{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 20676, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 21.420076370239258, "learning_rate": 2.4154589371980672e-09, "loss": 1.1397, "step": 1 }, { "epoch": 0.0, "grad_norm": 19.92348861694336, "learning_rate": 4.8309178743961344e-09, "loss": 0.9482, "step": 2 }, { "epoch": 0.0, "grad_norm": 20.25980567932129, "learning_rate": 7.246376811594203e-09, "loss": 1.0392, "step": 3 }, { "epoch": 0.0, "grad_norm": 22.879924774169922, "learning_rate": 9.661835748792269e-09, "loss": 1.0585, "step": 4 }, { "epoch": 0.0, "grad_norm": 20.362548828125, "learning_rate": 1.2077294685990337e-08, "loss": 1.0468, "step": 5 }, { "epoch": 0.0, "grad_norm": 20.90379524230957, "learning_rate": 1.4492753623188406e-08, "loss": 1.0751, "step": 6 }, { "epoch": 0.0, "grad_norm": 19.7142391204834, "learning_rate": 1.6908212560386473e-08, "loss": 1.0701, "step": 7 }, { "epoch": 0.0, "grad_norm": 21.943117141723633, "learning_rate": 1.9323671497584538e-08, "loss": 0.8675, "step": 8 }, { "epoch": 0.0, "grad_norm": 18.573280334472656, "learning_rate": 2.1739130434782606e-08, "loss": 1.0141, "step": 9 }, { "epoch": 0.0, "grad_norm": 20.725828170776367, "learning_rate": 2.4154589371980675e-08, "loss": 0.9936, "step": 10 }, { "epoch": 0.0, "grad_norm": 19.45916748046875, "learning_rate": 2.6570048309178743e-08, "loss": 1.0452, "step": 11 }, { "epoch": 0.0, "grad_norm": 20.74947738647461, "learning_rate": 2.898550724637681e-08, "loss": 1.0159, "step": 12 }, { "epoch": 0.0, "grad_norm": 21.77608299255371, "learning_rate": 3.1400966183574877e-08, "loss": 0.9241, "step": 13 }, { "epoch": 0.0, "grad_norm": 23.113662719726562, "learning_rate": 3.3816425120772945e-08, "loss": 1.1344, "step": 14 }, { "epoch": 0.0, "grad_norm": 20.3964900970459, "learning_rate": 3.6231884057971014e-08, "loss": 1.045, "step": 15 }, { "epoch": 0.0, "grad_norm": 22.764055252075195, "learning_rate": 3.8647342995169075e-08, "loss": 1.0774, "step": 16 }, { "epoch": 0.0, "grad_norm": 21.83795738220215, "learning_rate": 4.106280193236715e-08, "loss": 0.9682, "step": 17 }, { "epoch": 0.0, "grad_norm": 22.566574096679688, "learning_rate": 4.347826086956521e-08, "loss": 1.0175, "step": 18 }, { "epoch": 0.0, "grad_norm": 20.925960540771484, "learning_rate": 4.589371980676329e-08, "loss": 0.9738, "step": 19 }, { "epoch": 0.0, "grad_norm": 17.796499252319336, "learning_rate": 4.830917874396135e-08, "loss": 1.116, "step": 20 }, { "epoch": 0.0, "grad_norm": 19.124238967895508, "learning_rate": 5.0724637681159424e-08, "loss": 0.9856, "step": 21 }, { "epoch": 0.0, "grad_norm": 17.118656158447266, "learning_rate": 5.3140096618357486e-08, "loss": 1.0162, "step": 22 }, { "epoch": 0.0, "grad_norm": 18.29786491394043, "learning_rate": 5.555555555555555e-08, "loss": 0.978, "step": 23 }, { "epoch": 0.0, "grad_norm": 18.274396896362305, "learning_rate": 5.797101449275362e-08, "loss": 0.9908, "step": 24 }, { "epoch": 0.0, "grad_norm": 21.422271728515625, "learning_rate": 6.038647342995169e-08, "loss": 1.0138, "step": 25 }, { "epoch": 0.0, "grad_norm": 19.95953369140625, "learning_rate": 6.280193236714975e-08, "loss": 1.0377, "step": 26 }, { "epoch": 0.0, "grad_norm": 17.56603240966797, "learning_rate": 6.521739130434782e-08, "loss": 1.009, "step": 27 }, { "epoch": 0.0, "grad_norm": 18.29461097717285, "learning_rate": 6.763285024154589e-08, "loss": 1.0258, "step": 28 }, { "epoch": 0.0, "grad_norm": 18.132652282714844, "learning_rate": 7.004830917874397e-08, "loss": 1.0442, "step": 29 }, { "epoch": 0.0, "grad_norm": 14.703685760498047, "learning_rate": 7.246376811594203e-08, "loss": 0.9185, "step": 30 }, { "epoch": 0.0, "grad_norm": 16.729921340942383, "learning_rate": 7.487922705314009e-08, "loss": 1.1168, "step": 31 }, { "epoch": 0.0, "grad_norm": 15.839676856994629, "learning_rate": 7.729468599033815e-08, "loss": 1.0137, "step": 32 }, { "epoch": 0.0, "grad_norm": 13.566006660461426, "learning_rate": 7.971014492753623e-08, "loss": 0.9944, "step": 33 }, { "epoch": 0.0, "grad_norm": 13.558852195739746, "learning_rate": 8.21256038647343e-08, "loss": 1.0193, "step": 34 }, { "epoch": 0.01, "grad_norm": 13.926807403564453, "learning_rate": 8.454106280193236e-08, "loss": 1.0222, "step": 35 }, { "epoch": 0.01, "grad_norm": 14.116297721862793, "learning_rate": 8.695652173913042e-08, "loss": 1.0069, "step": 36 }, { "epoch": 0.01, "grad_norm": 14.168923377990723, "learning_rate": 8.937198067632849e-08, "loss": 1.0327, "step": 37 }, { "epoch": 0.01, "grad_norm": 13.329092025756836, "learning_rate": 9.178743961352657e-08, "loss": 1.0153, "step": 38 }, { "epoch": 0.01, "grad_norm": 12.218950271606445, "learning_rate": 9.420289855072464e-08, "loss": 0.9944, "step": 39 }, { "epoch": 0.01, "grad_norm": 11.966972351074219, "learning_rate": 9.66183574879227e-08, "loss": 0.9794, "step": 40 }, { "epoch": 0.01, "grad_norm": 13.545537948608398, "learning_rate": 9.903381642512076e-08, "loss": 0.9898, "step": 41 }, { "epoch": 0.01, "grad_norm": 12.463408470153809, "learning_rate": 1.0144927536231885e-07, "loss": 0.9673, "step": 42 }, { "epoch": 0.01, "grad_norm": 12.92514705657959, "learning_rate": 1.0386473429951691e-07, "loss": 0.9094, "step": 43 }, { "epoch": 0.01, "grad_norm": 13.297220230102539, "learning_rate": 1.0628019323671497e-07, "loss": 1.02, "step": 44 }, { "epoch": 0.01, "grad_norm": 13.21561336517334, "learning_rate": 1.0869565217391303e-07, "loss": 1.003, "step": 45 }, { "epoch": 0.01, "grad_norm": 12.29727554321289, "learning_rate": 1.111111111111111e-07, "loss": 0.9887, "step": 46 }, { "epoch": 0.01, "grad_norm": 11.532038688659668, "learning_rate": 1.1352657004830918e-07, "loss": 0.8964, "step": 47 }, { "epoch": 0.01, "grad_norm": 11.909180641174316, "learning_rate": 1.1594202898550725e-07, "loss": 1.0198, "step": 48 }, { "epoch": 0.01, "grad_norm": 10.330426216125488, "learning_rate": 1.1835748792270531e-07, "loss": 0.7951, "step": 49 }, { "epoch": 0.01, "grad_norm": 10.107664108276367, "learning_rate": 1.2077294685990338e-07, "loss": 0.912, "step": 50 }, { "epoch": 0.01, "grad_norm": 10.514517784118652, "learning_rate": 1.2318840579710146e-07, "loss": 0.9607, "step": 51 }, { "epoch": 0.01, "grad_norm": 9.973725318908691, "learning_rate": 1.256038647342995e-07, "loss": 0.8826, "step": 52 }, { "epoch": 0.01, "grad_norm": 9.599990844726562, "learning_rate": 1.2801932367149758e-07, "loss": 0.8916, "step": 53 }, { "epoch": 0.01, "grad_norm": 10.56980037689209, "learning_rate": 1.3043478260869563e-07, "loss": 0.9329, "step": 54 }, { "epoch": 0.01, "grad_norm": 9.967477798461914, "learning_rate": 1.3285024154589373e-07, "loss": 0.9608, "step": 55 }, { "epoch": 0.01, "grad_norm": 10.299619674682617, "learning_rate": 1.3526570048309178e-07, "loss": 0.7767, "step": 56 }, { "epoch": 0.01, "grad_norm": 10.701433181762695, "learning_rate": 1.3768115942028986e-07, "loss": 0.7645, "step": 57 }, { "epoch": 0.01, "grad_norm": 10.680046081542969, "learning_rate": 1.4009661835748793e-07, "loss": 0.9758, "step": 58 }, { "epoch": 0.01, "grad_norm": 10.038948059082031, "learning_rate": 1.4251207729468598e-07, "loss": 0.9875, "step": 59 }, { "epoch": 0.01, "grad_norm": 8.708356857299805, "learning_rate": 1.4492753623188405e-07, "loss": 0.9306, "step": 60 }, { "epoch": 0.01, "grad_norm": 9.795671463012695, "learning_rate": 1.473429951690821e-07, "loss": 0.9287, "step": 61 }, { "epoch": 0.01, "grad_norm": 9.637264251708984, "learning_rate": 1.4975845410628018e-07, "loss": 0.8451, "step": 62 }, { "epoch": 0.01, "grad_norm": 9.72253131866455, "learning_rate": 1.5217391304347825e-07, "loss": 0.8673, "step": 63 }, { "epoch": 0.01, "grad_norm": 8.811726570129395, "learning_rate": 1.545893719806763e-07, "loss": 0.8816, "step": 64 }, { "epoch": 0.01, "grad_norm": 9.467260360717773, "learning_rate": 1.570048309178744e-07, "loss": 0.8609, "step": 65 }, { "epoch": 0.01, "grad_norm": 9.82490348815918, "learning_rate": 1.5942028985507245e-07, "loss": 0.8505, "step": 66 }, { "epoch": 0.01, "grad_norm": 9.594671249389648, "learning_rate": 1.6183574879227053e-07, "loss": 0.903, "step": 67 }, { "epoch": 0.01, "grad_norm": 9.156904220581055, "learning_rate": 1.642512077294686e-07, "loss": 0.9271, "step": 68 }, { "epoch": 0.01, "grad_norm": 8.807597160339355, "learning_rate": 1.6666666666666665e-07, "loss": 0.8276, "step": 69 }, { "epoch": 0.01, "grad_norm": 10.44106388092041, "learning_rate": 1.6908212560386473e-07, "loss": 0.8674, "step": 70 }, { "epoch": 0.01, "grad_norm": 9.574630737304688, "learning_rate": 1.714975845410628e-07, "loss": 0.8493, "step": 71 }, { "epoch": 0.01, "grad_norm": 9.69957160949707, "learning_rate": 1.7391304347826085e-07, "loss": 0.9028, "step": 72 }, { "epoch": 0.01, "grad_norm": 8.944463729858398, "learning_rate": 1.7632850241545892e-07, "loss": 0.9243, "step": 73 }, { "epoch": 0.01, "grad_norm": 9.061956405639648, "learning_rate": 1.7874396135265697e-07, "loss": 0.8865, "step": 74 }, { "epoch": 0.01, "grad_norm": 9.553507804870605, "learning_rate": 1.8115942028985507e-07, "loss": 0.8438, "step": 75 }, { "epoch": 0.01, "grad_norm": 8.711469650268555, "learning_rate": 1.8357487922705315e-07, "loss": 0.944, "step": 76 }, { "epoch": 0.01, "grad_norm": 10.09415054321289, "learning_rate": 1.859903381642512e-07, "loss": 0.7956, "step": 77 }, { "epoch": 0.01, "grad_norm": 10.040940284729004, "learning_rate": 1.8840579710144927e-07, "loss": 0.7811, "step": 78 }, { "epoch": 0.01, "grad_norm": 8.437114715576172, "learning_rate": 1.9082125603864732e-07, "loss": 0.84, "step": 79 }, { "epoch": 0.01, "grad_norm": 8.769733428955078, "learning_rate": 1.932367149758454e-07, "loss": 0.8377, "step": 80 }, { "epoch": 0.01, "grad_norm": 8.552127838134766, "learning_rate": 1.9565217391304347e-07, "loss": 0.8685, "step": 81 }, { "epoch": 0.01, "grad_norm": 8.997797012329102, "learning_rate": 1.9806763285024152e-07, "loss": 0.8874, "step": 82 }, { "epoch": 0.01, "grad_norm": 7.961603164672852, "learning_rate": 2.004830917874396e-07, "loss": 0.8079, "step": 83 }, { "epoch": 0.01, "grad_norm": 9.955812454223633, "learning_rate": 2.028985507246377e-07, "loss": 0.9327, "step": 84 }, { "epoch": 0.01, "grad_norm": 8.57805061340332, "learning_rate": 2.0531400966183575e-07, "loss": 0.8594, "step": 85 }, { "epoch": 0.01, "grad_norm": 9.257073402404785, "learning_rate": 2.0772946859903382e-07, "loss": 0.888, "step": 86 }, { "epoch": 0.01, "grad_norm": 10.347823143005371, "learning_rate": 2.1014492753623187e-07, "loss": 0.9949, "step": 87 }, { "epoch": 0.01, "grad_norm": 9.073945999145508, "learning_rate": 2.1256038647342994e-07, "loss": 0.9098, "step": 88 }, { "epoch": 0.01, "grad_norm": 9.005078315734863, "learning_rate": 2.1497584541062802e-07, "loss": 0.917, "step": 89 }, { "epoch": 0.01, "grad_norm": 8.916098594665527, "learning_rate": 2.1739130434782607e-07, "loss": 0.8744, "step": 90 }, { "epoch": 0.01, "grad_norm": 8.223121643066406, "learning_rate": 2.1980676328502414e-07, "loss": 0.8965, "step": 91 }, { "epoch": 0.01, "grad_norm": 8.649352073669434, "learning_rate": 2.222222222222222e-07, "loss": 0.832, "step": 92 }, { "epoch": 0.01, "grad_norm": 9.941629409790039, "learning_rate": 2.2463768115942027e-07, "loss": 0.9393, "step": 93 }, { "epoch": 0.01, "grad_norm": 9.5707426071167, "learning_rate": 2.2705314009661837e-07, "loss": 0.8249, "step": 94 }, { "epoch": 0.01, "grad_norm": 8.948908805847168, "learning_rate": 2.2946859903381642e-07, "loss": 0.8937, "step": 95 }, { "epoch": 0.01, "grad_norm": 8.956595420837402, "learning_rate": 2.318840579710145e-07, "loss": 0.8297, "step": 96 }, { "epoch": 0.01, "grad_norm": 7.984899044036865, "learning_rate": 2.3429951690821257e-07, "loss": 0.8199, "step": 97 }, { "epoch": 0.01, "grad_norm": 9.659669876098633, "learning_rate": 2.3671497584541062e-07, "loss": 0.8399, "step": 98 }, { "epoch": 0.01, "grad_norm": 8.45025634765625, "learning_rate": 2.391304347826087e-07, "loss": 0.8741, "step": 99 }, { "epoch": 0.01, "grad_norm": 10.280787467956543, "learning_rate": 2.4154589371980677e-07, "loss": 0.8696, "step": 100 }, { "epoch": 0.01, "grad_norm": 8.964407920837402, "learning_rate": 2.439613526570048e-07, "loss": 0.8182, "step": 101 }, { "epoch": 0.01, "grad_norm": 10.540839195251465, "learning_rate": 2.463768115942029e-07, "loss": 0.9163, "step": 102 }, { "epoch": 0.01, "grad_norm": 8.106494903564453, "learning_rate": 2.4879227053140096e-07, "loss": 0.7896, "step": 103 }, { "epoch": 0.02, "grad_norm": 9.102949142456055, "learning_rate": 2.51207729468599e-07, "loss": 0.7891, "step": 104 }, { "epoch": 0.02, "grad_norm": 8.137986183166504, "learning_rate": 2.536231884057971e-07, "loss": 0.7648, "step": 105 }, { "epoch": 0.02, "grad_norm": 8.467187881469727, "learning_rate": 2.5603864734299516e-07, "loss": 0.8231, "step": 106 }, { "epoch": 0.02, "grad_norm": 8.895845413208008, "learning_rate": 2.584541062801932e-07, "loss": 0.8079, "step": 107 }, { "epoch": 0.02, "grad_norm": 8.70427131652832, "learning_rate": 2.6086956521739126e-07, "loss": 0.8627, "step": 108 }, { "epoch": 0.02, "grad_norm": 9.124686241149902, "learning_rate": 2.6328502415458936e-07, "loss": 0.8725, "step": 109 }, { "epoch": 0.02, "grad_norm": 8.425718307495117, "learning_rate": 2.6570048309178746e-07, "loss": 0.8132, "step": 110 }, { "epoch": 0.02, "grad_norm": 8.200350761413574, "learning_rate": 2.681159420289855e-07, "loss": 0.7611, "step": 111 }, { "epoch": 0.02, "grad_norm": 8.239806175231934, "learning_rate": 2.7053140096618356e-07, "loss": 0.7478, "step": 112 }, { "epoch": 0.02, "grad_norm": 8.832508087158203, "learning_rate": 2.729468599033816e-07, "loss": 0.7769, "step": 113 }, { "epoch": 0.02, "grad_norm": 9.051953315734863, "learning_rate": 2.753623188405797e-07, "loss": 0.7673, "step": 114 }, { "epoch": 0.02, "grad_norm": 8.524765968322754, "learning_rate": 2.7777777777777776e-07, "loss": 0.7957, "step": 115 }, { "epoch": 0.02, "grad_norm": 9.411301612854004, "learning_rate": 2.8019323671497586e-07, "loss": 0.8387, "step": 116 }, { "epoch": 0.02, "grad_norm": 8.292466163635254, "learning_rate": 2.8260869565217386e-07, "loss": 0.8987, "step": 117 }, { "epoch": 0.02, "grad_norm": 9.067767143249512, "learning_rate": 2.8502415458937196e-07, "loss": 0.8202, "step": 118 }, { "epoch": 0.02, "grad_norm": 8.500149726867676, "learning_rate": 2.8743961352657006e-07, "loss": 0.775, "step": 119 }, { "epoch": 0.02, "grad_norm": 7.541442394256592, "learning_rate": 2.898550724637681e-07, "loss": 0.8389, "step": 120 }, { "epoch": 0.02, "grad_norm": 9.100056648254395, "learning_rate": 2.922705314009662e-07, "loss": 0.8795, "step": 121 }, { "epoch": 0.02, "grad_norm": 9.255707740783691, "learning_rate": 2.946859903381642e-07, "loss": 0.8542, "step": 122 }, { "epoch": 0.02, "grad_norm": 10.698966026306152, "learning_rate": 2.971014492753623e-07, "loss": 0.8989, "step": 123 }, { "epoch": 0.02, "grad_norm": 8.189776420593262, "learning_rate": 2.9951690821256036e-07, "loss": 0.861, "step": 124 }, { "epoch": 0.02, "grad_norm": 9.577753067016602, "learning_rate": 3.0193236714975846e-07, "loss": 0.8581, "step": 125 }, { "epoch": 0.02, "grad_norm": 9.846701622009277, "learning_rate": 3.043478260869565e-07, "loss": 0.7974, "step": 126 }, { "epoch": 0.02, "grad_norm": 8.673282623291016, "learning_rate": 3.0676328502415455e-07, "loss": 0.8081, "step": 127 }, { "epoch": 0.02, "grad_norm": 8.500358581542969, "learning_rate": 3.091787439613526e-07, "loss": 0.7539, "step": 128 }, { "epoch": 0.02, "grad_norm": 8.834930419921875, "learning_rate": 3.115942028985507e-07, "loss": 0.8998, "step": 129 }, { "epoch": 0.02, "grad_norm": 7.188091278076172, "learning_rate": 3.140096618357488e-07, "loss": 0.7509, "step": 130 }, { "epoch": 0.02, "grad_norm": 8.585405349731445, "learning_rate": 3.1642512077294685e-07, "loss": 0.7659, "step": 131 }, { "epoch": 0.02, "grad_norm": 9.34982681274414, "learning_rate": 3.188405797101449e-07, "loss": 0.8061, "step": 132 }, { "epoch": 0.02, "grad_norm": 8.905574798583984, "learning_rate": 3.2125603864734295e-07, "loss": 0.8531, "step": 133 }, { "epoch": 0.02, "grad_norm": 8.978387832641602, "learning_rate": 3.2367149758454105e-07, "loss": 0.8199, "step": 134 }, { "epoch": 0.02, "grad_norm": 7.521595478057861, "learning_rate": 3.260869565217391e-07, "loss": 0.7723, "step": 135 }, { "epoch": 0.02, "grad_norm": 8.698850631713867, "learning_rate": 3.285024154589372e-07, "loss": 0.8159, "step": 136 }, { "epoch": 0.02, "grad_norm": 8.7655668258667, "learning_rate": 3.3091787439613525e-07, "loss": 0.8477, "step": 137 }, { "epoch": 0.02, "grad_norm": 8.569133758544922, "learning_rate": 3.333333333333333e-07, "loss": 0.8469, "step": 138 }, { "epoch": 0.02, "grad_norm": 9.362787246704102, "learning_rate": 3.357487922705314e-07, "loss": 0.7423, "step": 139 }, { "epoch": 0.02, "grad_norm": 8.546152114868164, "learning_rate": 3.3816425120772945e-07, "loss": 0.7333, "step": 140 }, { "epoch": 0.02, "grad_norm": 8.649870872497559, "learning_rate": 3.4057971014492755e-07, "loss": 0.8872, "step": 141 }, { "epoch": 0.02, "grad_norm": 10.972417831420898, "learning_rate": 3.429951690821256e-07, "loss": 0.8893, "step": 142 }, { "epoch": 0.02, "grad_norm": 8.826498031616211, "learning_rate": 3.4541062801932365e-07, "loss": 0.9027, "step": 143 }, { "epoch": 0.02, "grad_norm": 9.915367126464844, "learning_rate": 3.478260869565217e-07, "loss": 0.8362, "step": 144 }, { "epoch": 0.02, "grad_norm": 8.46672534942627, "learning_rate": 3.502415458937198e-07, "loss": 0.8045, "step": 145 }, { "epoch": 0.02, "grad_norm": 8.552976608276367, "learning_rate": 3.5265700483091785e-07, "loss": 0.7964, "step": 146 }, { "epoch": 0.02, "grad_norm": 8.812414169311523, "learning_rate": 3.5507246376811595e-07, "loss": 0.8582, "step": 147 }, { "epoch": 0.02, "grad_norm": 8.301215171813965, "learning_rate": 3.5748792270531395e-07, "loss": 0.8361, "step": 148 }, { "epoch": 0.02, "grad_norm": 8.037793159484863, "learning_rate": 3.5990338164251205e-07, "loss": 0.8236, "step": 149 }, { "epoch": 0.02, "grad_norm": 8.59532642364502, "learning_rate": 3.6231884057971015e-07, "loss": 0.9185, "step": 150 }, { "epoch": 0.02, "grad_norm": 9.251195907592773, "learning_rate": 3.647342995169082e-07, "loss": 0.7921, "step": 151 }, { "epoch": 0.02, "grad_norm": 9.080644607543945, "learning_rate": 3.671497584541063e-07, "loss": 0.8008, "step": 152 }, { "epoch": 0.02, "grad_norm": 8.119750022888184, "learning_rate": 3.695652173913043e-07, "loss": 0.7793, "step": 153 }, { "epoch": 0.02, "grad_norm": 9.260024070739746, "learning_rate": 3.719806763285024e-07, "loss": 0.7699, "step": 154 }, { "epoch": 0.02, "grad_norm": 9.126934051513672, "learning_rate": 3.7439613526570044e-07, "loss": 0.81, "step": 155 }, { "epoch": 0.02, "grad_norm": 8.056917190551758, "learning_rate": 3.7681159420289855e-07, "loss": 0.8921, "step": 156 }, { "epoch": 0.02, "grad_norm": 9.474838256835938, "learning_rate": 3.792270531400966e-07, "loss": 0.8915, "step": 157 }, { "epoch": 0.02, "grad_norm": 8.348868370056152, "learning_rate": 3.8164251207729464e-07, "loss": 0.9005, "step": 158 }, { "epoch": 0.02, "grad_norm": 8.52780532836914, "learning_rate": 3.8405797101449274e-07, "loss": 0.7696, "step": 159 }, { "epoch": 0.02, "grad_norm": 8.567625999450684, "learning_rate": 3.864734299516908e-07, "loss": 0.8055, "step": 160 }, { "epoch": 0.02, "grad_norm": 8.763707160949707, "learning_rate": 3.888888888888889e-07, "loss": 0.8305, "step": 161 }, { "epoch": 0.02, "grad_norm": 7.65728235244751, "learning_rate": 3.9130434782608694e-07, "loss": 0.8064, "step": 162 }, { "epoch": 0.02, "grad_norm": 8.464720726013184, "learning_rate": 3.9371980676328504e-07, "loss": 0.8697, "step": 163 }, { "epoch": 0.02, "grad_norm": 9.028409004211426, "learning_rate": 3.9613526570048304e-07, "loss": 0.8115, "step": 164 }, { "epoch": 0.02, "grad_norm": 8.45876407623291, "learning_rate": 3.9855072463768114e-07, "loss": 0.7966, "step": 165 }, { "epoch": 0.02, "grad_norm": 8.423782348632812, "learning_rate": 4.009661835748792e-07, "loss": 0.9357, "step": 166 }, { "epoch": 0.02, "grad_norm": 7.707049369812012, "learning_rate": 4.033816425120773e-07, "loss": 0.7092, "step": 167 }, { "epoch": 0.02, "grad_norm": 8.197909355163574, "learning_rate": 4.057971014492754e-07, "loss": 0.9184, "step": 168 }, { "epoch": 0.02, "grad_norm": 8.019355773925781, "learning_rate": 4.082125603864734e-07, "loss": 0.8726, "step": 169 }, { "epoch": 0.02, "grad_norm": 9.610885620117188, "learning_rate": 4.106280193236715e-07, "loss": 0.7437, "step": 170 }, { "epoch": 0.02, "grad_norm": 8.083844184875488, "learning_rate": 4.1304347826086954e-07, "loss": 0.7492, "step": 171 }, { "epoch": 0.02, "grad_norm": 9.068297386169434, "learning_rate": 4.1545893719806764e-07, "loss": 0.7809, "step": 172 }, { "epoch": 0.03, "grad_norm": 8.125038146972656, "learning_rate": 4.178743961352657e-07, "loss": 0.8496, "step": 173 }, { "epoch": 0.03, "grad_norm": 8.512014389038086, "learning_rate": 4.2028985507246374e-07, "loss": 0.7883, "step": 174 }, { "epoch": 0.03, "grad_norm": 8.38894271850586, "learning_rate": 4.227053140096618e-07, "loss": 0.7952, "step": 175 }, { "epoch": 0.03, "grad_norm": 9.192437171936035, "learning_rate": 4.251207729468599e-07, "loss": 0.787, "step": 176 }, { "epoch": 0.03, "grad_norm": 8.67406940460205, "learning_rate": 4.2753623188405794e-07, "loss": 0.703, "step": 177 }, { "epoch": 0.03, "grad_norm": 8.721837997436523, "learning_rate": 4.2995169082125604e-07, "loss": 0.8147, "step": 178 }, { "epoch": 0.03, "grad_norm": 9.606791496276855, "learning_rate": 4.323671497584541e-07, "loss": 0.7629, "step": 179 }, { "epoch": 0.03, "grad_norm": 8.251015663146973, "learning_rate": 4.3478260869565214e-07, "loss": 0.7967, "step": 180 }, { "epoch": 0.03, "grad_norm": 8.674221992492676, "learning_rate": 4.3719806763285024e-07, "loss": 0.775, "step": 181 }, { "epoch": 0.03, "grad_norm": 9.3303861618042, "learning_rate": 4.396135265700483e-07, "loss": 0.8133, "step": 182 }, { "epoch": 0.03, "grad_norm": 8.599987030029297, "learning_rate": 4.420289855072464e-07, "loss": 0.8875, "step": 183 }, { "epoch": 0.03, "grad_norm": 9.014764785766602, "learning_rate": 4.444444444444444e-07, "loss": 0.8038, "step": 184 }, { "epoch": 0.03, "grad_norm": 7.556729316711426, "learning_rate": 4.468599033816425e-07, "loss": 0.7665, "step": 185 }, { "epoch": 0.03, "grad_norm": 8.240182876586914, "learning_rate": 4.4927536231884053e-07, "loss": 0.8881, "step": 186 }, { "epoch": 0.03, "grad_norm": 8.470274925231934, "learning_rate": 4.5169082125603863e-07, "loss": 0.8457, "step": 187 }, { "epoch": 0.03, "grad_norm": 8.399374961853027, "learning_rate": 4.5410628019323674e-07, "loss": 0.8123, "step": 188 }, { "epoch": 0.03, "grad_norm": 8.690375328063965, "learning_rate": 4.5652173913043473e-07, "loss": 0.8208, "step": 189 }, { "epoch": 0.03, "grad_norm": 9.119149208068848, "learning_rate": 4.5893719806763283e-07, "loss": 0.7211, "step": 190 }, { "epoch": 0.03, "grad_norm": 9.17790699005127, "learning_rate": 4.613526570048309e-07, "loss": 0.7366, "step": 191 }, { "epoch": 0.03, "grad_norm": 7.789165496826172, "learning_rate": 4.63768115942029e-07, "loss": 0.7395, "step": 192 }, { "epoch": 0.03, "grad_norm": 8.209280967712402, "learning_rate": 4.6618357487922703e-07, "loss": 0.7982, "step": 193 }, { "epoch": 0.03, "grad_norm": 9.280526161193848, "learning_rate": 4.6859903381642513e-07, "loss": 0.8009, "step": 194 }, { "epoch": 0.03, "grad_norm": 9.319620132446289, "learning_rate": 4.7101449275362313e-07, "loss": 0.8486, "step": 195 }, { "epoch": 0.03, "grad_norm": 8.111810684204102, "learning_rate": 4.7342995169082123e-07, "loss": 0.808, "step": 196 }, { "epoch": 0.03, "grad_norm": 9.142539024353027, "learning_rate": 4.758454106280193e-07, "loss": 0.8234, "step": 197 }, { "epoch": 0.03, "grad_norm": 9.05260944366455, "learning_rate": 4.782608695652174e-07, "loss": 0.882, "step": 198 }, { "epoch": 0.03, "grad_norm": 8.357937812805176, "learning_rate": 4.806763285024155e-07, "loss": 0.7519, "step": 199 }, { "epoch": 0.03, "grad_norm": 8.63204288482666, "learning_rate": 4.830917874396135e-07, "loss": 0.8145, "step": 200 }, { "epoch": 0.03, "grad_norm": 9.772710800170898, "learning_rate": 4.855072463768116e-07, "loss": 0.8654, "step": 201 }, { "epoch": 0.03, "grad_norm": 9.255741119384766, "learning_rate": 4.879227053140096e-07, "loss": 0.8244, "step": 202 }, { "epoch": 0.03, "grad_norm": 8.14548110961914, "learning_rate": 4.903381642512077e-07, "loss": 0.8868, "step": 203 }, { "epoch": 0.03, "grad_norm": 9.03281307220459, "learning_rate": 4.927536231884058e-07, "loss": 0.7692, "step": 204 }, { "epoch": 0.03, "grad_norm": 7.9723944664001465, "learning_rate": 4.951690821256038e-07, "loss": 0.8166, "step": 205 }, { "epoch": 0.03, "grad_norm": 8.161651611328125, "learning_rate": 4.975845410628019e-07, "loss": 0.8158, "step": 206 }, { "epoch": 0.03, "grad_norm": 8.701957702636719, "learning_rate": 5e-07, "loss": 0.8867, "step": 207 }, { "epoch": 0.03, "grad_norm": 9.932840347290039, "learning_rate": 5.02415458937198e-07, "loss": 0.8422, "step": 208 }, { "epoch": 0.03, "grad_norm": 9.706363677978516, "learning_rate": 5.048309178743962e-07, "loss": 0.8824, "step": 209 }, { "epoch": 0.03, "grad_norm": 8.833113670349121, "learning_rate": 5.072463768115942e-07, "loss": 0.8531, "step": 210 }, { "epoch": 0.03, "grad_norm": 8.928537368774414, "learning_rate": 5.096618357487923e-07, "loss": 0.7253, "step": 211 }, { "epoch": 0.03, "grad_norm": 8.82126522064209, "learning_rate": 5.120772946859903e-07, "loss": 0.7976, "step": 212 }, { "epoch": 0.03, "grad_norm": 8.801895141601562, "learning_rate": 5.144927536231884e-07, "loss": 0.9054, "step": 213 }, { "epoch": 0.03, "grad_norm": 9.30174446105957, "learning_rate": 5.169082125603864e-07, "loss": 0.8148, "step": 214 }, { "epoch": 0.03, "grad_norm": 8.194101333618164, "learning_rate": 5.193236714975845e-07, "loss": 0.7624, "step": 215 }, { "epoch": 0.03, "grad_norm": 8.473793983459473, "learning_rate": 5.217391304347825e-07, "loss": 0.8404, "step": 216 }, { "epoch": 0.03, "grad_norm": 8.788778305053711, "learning_rate": 5.241545893719807e-07, "loss": 0.7727, "step": 217 }, { "epoch": 0.03, "grad_norm": 8.643074035644531, "learning_rate": 5.265700483091787e-07, "loss": 0.8312, "step": 218 }, { "epoch": 0.03, "grad_norm": 8.50632381439209, "learning_rate": 5.289855072463768e-07, "loss": 0.6982, "step": 219 }, { "epoch": 0.03, "grad_norm": 8.401885032653809, "learning_rate": 5.314009661835749e-07, "loss": 0.7652, "step": 220 }, { "epoch": 0.03, "grad_norm": 8.55151081085205, "learning_rate": 5.33816425120773e-07, "loss": 0.7568, "step": 221 }, { "epoch": 0.03, "grad_norm": 8.773747444152832, "learning_rate": 5.36231884057971e-07, "loss": 0.8009, "step": 222 }, { "epoch": 0.03, "grad_norm": 8.991849899291992, "learning_rate": 5.38647342995169e-07, "loss": 0.7762, "step": 223 }, { "epoch": 0.03, "grad_norm": 7.908154010772705, "learning_rate": 5.410628019323671e-07, "loss": 0.7843, "step": 224 }, { "epoch": 0.03, "grad_norm": 8.66555118560791, "learning_rate": 5.434782608695652e-07, "loss": 0.7324, "step": 225 }, { "epoch": 0.03, "grad_norm": 8.64578914642334, "learning_rate": 5.458937198067632e-07, "loss": 0.7634, "step": 226 }, { "epoch": 0.03, "grad_norm": 8.616151809692383, "learning_rate": 5.483091787439614e-07, "loss": 0.914, "step": 227 }, { "epoch": 0.03, "grad_norm": 8.243027687072754, "learning_rate": 5.507246376811594e-07, "loss": 0.7316, "step": 228 }, { "epoch": 0.03, "grad_norm": 9.26878833770752, "learning_rate": 5.531400966183575e-07, "loss": 0.8136, "step": 229 }, { "epoch": 0.03, "grad_norm": 9.452293395996094, "learning_rate": 5.555555555555555e-07, "loss": 0.9193, "step": 230 }, { "epoch": 0.03, "grad_norm": 8.0909423828125, "learning_rate": 5.579710144927537e-07, "loss": 0.7767, "step": 231 }, { "epoch": 0.03, "grad_norm": 7.564353942871094, "learning_rate": 5.603864734299517e-07, "loss": 0.7404, "step": 232 }, { "epoch": 0.03, "grad_norm": 9.443838119506836, "learning_rate": 5.628019323671497e-07, "loss": 0.7807, "step": 233 }, { "epoch": 0.03, "grad_norm": 7.732743740081787, "learning_rate": 5.652173913043477e-07, "loss": 0.6764, "step": 234 }, { "epoch": 0.03, "grad_norm": 7.953059673309326, "learning_rate": 5.676328502415459e-07, "loss": 0.8363, "step": 235 }, { "epoch": 0.03, "grad_norm": 8.455744743347168, "learning_rate": 5.700483091787439e-07, "loss": 0.7672, "step": 236 }, { "epoch": 0.03, "grad_norm": 9.43114948272705, "learning_rate": 5.72463768115942e-07, "loss": 0.8747, "step": 237 }, { "epoch": 0.03, "grad_norm": 8.434456825256348, "learning_rate": 5.748792270531401e-07, "loss": 0.7892, "step": 238 }, { "epoch": 0.03, "grad_norm": 8.280929565429688, "learning_rate": 5.772946859903382e-07, "loss": 0.739, "step": 239 }, { "epoch": 0.03, "grad_norm": 8.247971534729004, "learning_rate": 5.797101449275362e-07, "loss": 0.8067, "step": 240 }, { "epoch": 0.03, "grad_norm": 8.452585220336914, "learning_rate": 5.821256038647343e-07, "loss": 0.8055, "step": 241 }, { "epoch": 0.04, "grad_norm": 8.986918449401855, "learning_rate": 5.845410628019324e-07, "loss": 0.7516, "step": 242 }, { "epoch": 0.04, "grad_norm": 8.732271194458008, "learning_rate": 5.869565217391305e-07, "loss": 0.899, "step": 243 }, { "epoch": 0.04, "grad_norm": 8.678370475769043, "learning_rate": 5.893719806763284e-07, "loss": 0.8014, "step": 244 }, { "epoch": 0.04, "grad_norm": 7.961592674255371, "learning_rate": 5.917874396135265e-07, "loss": 0.7859, "step": 245 }, { "epoch": 0.04, "grad_norm": 8.570819854736328, "learning_rate": 5.942028985507246e-07, "loss": 0.8238, "step": 246 }, { "epoch": 0.04, "grad_norm": 7.980477809906006, "learning_rate": 5.966183574879227e-07, "loss": 0.8427, "step": 247 }, { "epoch": 0.04, "grad_norm": 8.667784690856934, "learning_rate": 5.990338164251207e-07, "loss": 0.8241, "step": 248 }, { "epoch": 0.04, "grad_norm": 9.15971851348877, "learning_rate": 6.014492753623189e-07, "loss": 0.7951, "step": 249 }, { "epoch": 0.04, "grad_norm": 10.183513641357422, "learning_rate": 6.038647342995169e-07, "loss": 0.7434, "step": 250 }, { "epoch": 0.04, "grad_norm": 8.315301895141602, "learning_rate": 6.06280193236715e-07, "loss": 0.8038, "step": 251 }, { "epoch": 0.04, "grad_norm": 9.121587753295898, "learning_rate": 6.08695652173913e-07, "loss": 0.8327, "step": 252 }, { "epoch": 0.04, "grad_norm": 8.667826652526855, "learning_rate": 6.111111111111112e-07, "loss": 0.7897, "step": 253 }, { "epoch": 0.04, "grad_norm": 8.166550636291504, "learning_rate": 6.135265700483091e-07, "loss": 0.7938, "step": 254 }, { "epoch": 0.04, "grad_norm": 8.148499488830566, "learning_rate": 6.159420289855072e-07, "loss": 0.7695, "step": 255 }, { "epoch": 0.04, "grad_norm": 8.439364433288574, "learning_rate": 6.183574879227052e-07, "loss": 0.8312, "step": 256 }, { "epoch": 0.04, "grad_norm": 8.434760093688965, "learning_rate": 6.207729468599034e-07, "loss": 0.8221, "step": 257 }, { "epoch": 0.04, "grad_norm": 7.928536891937256, "learning_rate": 6.231884057971014e-07, "loss": 0.7746, "step": 258 }, { "epoch": 0.04, "grad_norm": 8.555005073547363, "learning_rate": 6.256038647342995e-07, "loss": 0.8432, "step": 259 }, { "epoch": 0.04, "grad_norm": 9.221419334411621, "learning_rate": 6.280193236714976e-07, "loss": 0.8394, "step": 260 }, { "epoch": 0.04, "grad_norm": 7.981441497802734, "learning_rate": 6.304347826086957e-07, "loss": 0.6983, "step": 261 }, { "epoch": 0.04, "grad_norm": 9.17795467376709, "learning_rate": 6.328502415458937e-07, "loss": 0.7815, "step": 262 }, { "epoch": 0.04, "grad_norm": 8.552322387695312, "learning_rate": 6.352657004830918e-07, "loss": 0.7596, "step": 263 }, { "epoch": 0.04, "grad_norm": 8.043282508850098, "learning_rate": 6.376811594202898e-07, "loss": 0.7018, "step": 264 }, { "epoch": 0.04, "grad_norm": 8.678237915039062, "learning_rate": 6.400966183574879e-07, "loss": 0.785, "step": 265 }, { "epoch": 0.04, "grad_norm": 9.85046100616455, "learning_rate": 6.425120772946859e-07, "loss": 0.8019, "step": 266 }, { "epoch": 0.04, "grad_norm": 8.122142791748047, "learning_rate": 6.449275362318841e-07, "loss": 0.7794, "step": 267 }, { "epoch": 0.04, "grad_norm": 8.355965614318848, "learning_rate": 6.473429951690821e-07, "loss": 0.8086, "step": 268 }, { "epoch": 0.04, "grad_norm": 8.617119789123535, "learning_rate": 6.497584541062802e-07, "loss": 0.8649, "step": 269 }, { "epoch": 0.04, "grad_norm": 7.484366416931152, "learning_rate": 6.521739130434782e-07, "loss": 0.7931, "step": 270 }, { "epoch": 0.04, "grad_norm": 9.053442001342773, "learning_rate": 6.545893719806764e-07, "loss": 0.8062, "step": 271 }, { "epoch": 0.04, "grad_norm": 9.630528450012207, "learning_rate": 6.570048309178744e-07, "loss": 0.7709, "step": 272 }, { "epoch": 0.04, "grad_norm": 8.154251098632812, "learning_rate": 6.594202898550725e-07, "loss": 0.7333, "step": 273 }, { "epoch": 0.04, "grad_norm": 10.27403450012207, "learning_rate": 6.618357487922705e-07, "loss": 0.8199, "step": 274 }, { "epoch": 0.04, "grad_norm": 7.920419216156006, "learning_rate": 6.642512077294686e-07, "loss": 0.784, "step": 275 }, { "epoch": 0.04, "grad_norm": 7.880988121032715, "learning_rate": 6.666666666666666e-07, "loss": 0.8378, "step": 276 }, { "epoch": 0.04, "grad_norm": 8.732653617858887, "learning_rate": 6.690821256038646e-07, "loss": 0.8204, "step": 277 }, { "epoch": 0.04, "grad_norm": 9.843984603881836, "learning_rate": 6.714975845410628e-07, "loss": 0.7962, "step": 278 }, { "epoch": 0.04, "grad_norm": 9.542441368103027, "learning_rate": 6.739130434782609e-07, "loss": 0.8517, "step": 279 }, { "epoch": 0.04, "grad_norm": 7.8393144607543945, "learning_rate": 6.763285024154589e-07, "loss": 0.7896, "step": 280 }, { "epoch": 0.04, "grad_norm": 8.42013931274414, "learning_rate": 6.78743961352657e-07, "loss": 0.8016, "step": 281 }, { "epoch": 0.04, "grad_norm": 9.74475383758545, "learning_rate": 6.811594202898551e-07, "loss": 0.9545, "step": 282 }, { "epoch": 0.04, "grad_norm": 8.477212905883789, "learning_rate": 6.835748792270532e-07, "loss": 0.8211, "step": 283 }, { "epoch": 0.04, "grad_norm": 9.34976577758789, "learning_rate": 6.859903381642512e-07, "loss": 0.7502, "step": 284 }, { "epoch": 0.04, "grad_norm": 8.182233810424805, "learning_rate": 6.884057971014491e-07, "loss": 0.7212, "step": 285 }, { "epoch": 0.04, "grad_norm": 8.569870948791504, "learning_rate": 6.908212560386473e-07, "loss": 0.8601, "step": 286 }, { "epoch": 0.04, "grad_norm": 7.950462818145752, "learning_rate": 6.932367149758453e-07, "loss": 0.7525, "step": 287 }, { "epoch": 0.04, "grad_norm": 7.821057319641113, "learning_rate": 6.956521739130434e-07, "loss": 0.7844, "step": 288 }, { "epoch": 0.04, "grad_norm": 8.433938026428223, "learning_rate": 6.980676328502416e-07, "loss": 0.911, "step": 289 }, { "epoch": 0.04, "grad_norm": 8.972312927246094, "learning_rate": 7.004830917874396e-07, "loss": 0.7852, "step": 290 }, { "epoch": 0.04, "grad_norm": 7.884095668792725, "learning_rate": 7.028985507246376e-07, "loss": 0.7349, "step": 291 }, { "epoch": 0.04, "grad_norm": 7.4659295082092285, "learning_rate": 7.053140096618357e-07, "loss": 0.7049, "step": 292 }, { "epoch": 0.04, "grad_norm": 9.135799407958984, "learning_rate": 7.077294685990339e-07, "loss": 0.7887, "step": 293 }, { "epoch": 0.04, "grad_norm": 8.925572395324707, "learning_rate": 7.101449275362319e-07, "loss": 0.8442, "step": 294 }, { "epoch": 0.04, "grad_norm": 9.90822696685791, "learning_rate": 7.125603864734298e-07, "loss": 0.8769, "step": 295 }, { "epoch": 0.04, "grad_norm": 9.237275123596191, "learning_rate": 7.149758454106279e-07, "loss": 0.7917, "step": 296 }, { "epoch": 0.04, "grad_norm": 7.742967128753662, "learning_rate": 7.17391304347826e-07, "loss": 0.7159, "step": 297 }, { "epoch": 0.04, "grad_norm": 7.952823162078857, "learning_rate": 7.198067632850241e-07, "loss": 0.8786, "step": 298 }, { "epoch": 0.04, "grad_norm": 8.460858345031738, "learning_rate": 7.222222222222221e-07, "loss": 0.8445, "step": 299 }, { "epoch": 0.04, "grad_norm": 8.103675842285156, "learning_rate": 7.246376811594203e-07, "loss": 0.8161, "step": 300 }, { "epoch": 0.04, "grad_norm": 8.317255973815918, "learning_rate": 7.270531400966183e-07, "loss": 0.7596, "step": 301 }, { "epoch": 0.04, "grad_norm": 7.400672435760498, "learning_rate": 7.294685990338164e-07, "loss": 0.7718, "step": 302 }, { "epoch": 0.04, "grad_norm": 7.704049110412598, "learning_rate": 7.318840579710144e-07, "loss": 0.6883, "step": 303 }, { "epoch": 0.04, "grad_norm": 8.50510025024414, "learning_rate": 7.342995169082126e-07, "loss": 0.7865, "step": 304 }, { "epoch": 0.04, "grad_norm": 7.899789333343506, "learning_rate": 7.367149758454106e-07, "loss": 0.7603, "step": 305 }, { "epoch": 0.04, "grad_norm": 8.872576713562012, "learning_rate": 7.391304347826086e-07, "loss": 0.8291, "step": 306 }, { "epoch": 0.04, "grad_norm": 19.030017852783203, "learning_rate": 7.415458937198067e-07, "loss": 0.7898, "step": 307 }, { "epoch": 0.04, "grad_norm": 9.317584991455078, "learning_rate": 7.439613526570048e-07, "loss": 0.8179, "step": 308 }, { "epoch": 0.04, "grad_norm": 8.167390823364258, "learning_rate": 7.463768115942028e-07, "loss": 0.7532, "step": 309 }, { "epoch": 0.04, "grad_norm": 8.579687118530273, "learning_rate": 7.487922705314009e-07, "loss": 0.741, "step": 310 }, { "epoch": 0.05, "grad_norm": 8.059122085571289, "learning_rate": 7.51207729468599e-07, "loss": 0.7422, "step": 311 }, { "epoch": 0.05, "grad_norm": 8.721208572387695, "learning_rate": 7.536231884057971e-07, "loss": 0.8179, "step": 312 }, { "epoch": 0.05, "grad_norm": 8.761276245117188, "learning_rate": 7.560386473429951e-07, "loss": 0.8631, "step": 313 }, { "epoch": 0.05, "grad_norm": 8.494675636291504, "learning_rate": 7.584541062801932e-07, "loss": 0.6811, "step": 314 }, { "epoch": 0.05, "grad_norm": 8.104771614074707, "learning_rate": 7.608695652173913e-07, "loss": 0.7479, "step": 315 }, { "epoch": 0.05, "grad_norm": 8.150835037231445, "learning_rate": 7.632850241545893e-07, "loss": 0.7087, "step": 316 }, { "epoch": 0.05, "grad_norm": 9.686736106872559, "learning_rate": 7.657004830917873e-07, "loss": 0.8614, "step": 317 }, { "epoch": 0.05, "grad_norm": 8.82465648651123, "learning_rate": 7.681159420289855e-07, "loss": 0.9085, "step": 318 }, { "epoch": 0.05, "grad_norm": 9.41411304473877, "learning_rate": 7.705314009661835e-07, "loss": 0.8491, "step": 319 }, { "epoch": 0.05, "grad_norm": 8.952282905578613, "learning_rate": 7.729468599033816e-07, "loss": 0.6986, "step": 320 }, { "epoch": 0.05, "grad_norm": 8.590654373168945, "learning_rate": 7.753623188405796e-07, "loss": 0.8338, "step": 321 }, { "epoch": 0.05, "grad_norm": 7.816141128540039, "learning_rate": 7.777777777777778e-07, "loss": 0.7203, "step": 322 }, { "epoch": 0.05, "grad_norm": 8.17349624633789, "learning_rate": 7.801932367149758e-07, "loss": 0.716, "step": 323 }, { "epoch": 0.05, "grad_norm": 8.927626609802246, "learning_rate": 7.826086956521739e-07, "loss": 0.8551, "step": 324 }, { "epoch": 0.05, "grad_norm": 8.776883125305176, "learning_rate": 7.85024154589372e-07, "loss": 0.8123, "step": 325 }, { "epoch": 0.05, "grad_norm": 8.268925666809082, "learning_rate": 7.874396135265701e-07, "loss": 0.7861, "step": 326 }, { "epoch": 0.05, "grad_norm": 9.026747703552246, "learning_rate": 7.89855072463768e-07, "loss": 0.8599, "step": 327 }, { "epoch": 0.05, "grad_norm": 8.497268676757812, "learning_rate": 7.922705314009661e-07, "loss": 0.7615, "step": 328 }, { "epoch": 0.05, "grad_norm": 8.709245681762695, "learning_rate": 7.946859903381642e-07, "loss": 0.7736, "step": 329 }, { "epoch": 0.05, "grad_norm": 8.085322380065918, "learning_rate": 7.971014492753623e-07, "loss": 0.7308, "step": 330 }, { "epoch": 0.05, "grad_norm": 8.01878547668457, "learning_rate": 7.995169082125603e-07, "loss": 0.7533, "step": 331 }, { "epoch": 0.05, "grad_norm": 7.387136936187744, "learning_rate": 8.019323671497584e-07, "loss": 0.7711, "step": 332 }, { "epoch": 0.05, "grad_norm": 9.318050384521484, "learning_rate": 8.043478260869565e-07, "loss": 0.8357, "step": 333 }, { "epoch": 0.05, "grad_norm": 8.809951782226562, "learning_rate": 8.067632850241546e-07, "loss": 0.7557, "step": 334 }, { "epoch": 0.05, "grad_norm": 7.254526138305664, "learning_rate": 8.091787439613526e-07, "loss": 0.7489, "step": 335 }, { "epoch": 0.05, "grad_norm": 8.43128490447998, "learning_rate": 8.115942028985508e-07, "loss": 0.7696, "step": 336 }, { "epoch": 0.05, "grad_norm": 8.538651466369629, "learning_rate": 8.140096618357487e-07, "loss": 0.7356, "step": 337 }, { "epoch": 0.05, "grad_norm": 8.042862892150879, "learning_rate": 8.164251207729468e-07, "loss": 0.7209, "step": 338 }, { "epoch": 0.05, "grad_norm": 9.008866310119629, "learning_rate": 8.188405797101448e-07, "loss": 0.7263, "step": 339 }, { "epoch": 0.05, "grad_norm": 7.886390209197998, "learning_rate": 8.21256038647343e-07, "loss": 0.8779, "step": 340 }, { "epoch": 0.05, "grad_norm": 9.052508354187012, "learning_rate": 8.23671497584541e-07, "loss": 0.8886, "step": 341 }, { "epoch": 0.05, "grad_norm": 8.273747444152832, "learning_rate": 8.260869565217391e-07, "loss": 0.7751, "step": 342 }, { "epoch": 0.05, "grad_norm": 9.420853614807129, "learning_rate": 8.285024154589371e-07, "loss": 0.9547, "step": 343 }, { "epoch": 0.05, "grad_norm": 8.513460159301758, "learning_rate": 8.309178743961353e-07, "loss": 0.8111, "step": 344 }, { "epoch": 0.05, "grad_norm": 7.752247333526611, "learning_rate": 8.333333333333333e-07, "loss": 0.7748, "step": 345 }, { "epoch": 0.05, "grad_norm": 8.525763511657715, "learning_rate": 8.357487922705314e-07, "loss": 0.8008, "step": 346 }, { "epoch": 0.05, "grad_norm": 8.899885177612305, "learning_rate": 8.381642512077294e-07, "loss": 0.7695, "step": 347 }, { "epoch": 0.05, "grad_norm": 8.3970365524292, "learning_rate": 8.405797101449275e-07, "loss": 0.7364, "step": 348 }, { "epoch": 0.05, "grad_norm": 10.868767738342285, "learning_rate": 8.429951690821255e-07, "loss": 0.8562, "step": 349 }, { "epoch": 0.05, "grad_norm": 8.071277618408203, "learning_rate": 8.454106280193236e-07, "loss": 0.7995, "step": 350 }, { "epoch": 0.05, "grad_norm": 7.653231143951416, "learning_rate": 8.478260869565217e-07, "loss": 0.7094, "step": 351 }, { "epoch": 0.05, "grad_norm": 8.903921127319336, "learning_rate": 8.502415458937198e-07, "loss": 0.8905, "step": 352 }, { "epoch": 0.05, "grad_norm": 8.811760902404785, "learning_rate": 8.526570048309178e-07, "loss": 0.7324, "step": 353 }, { "epoch": 0.05, "grad_norm": 7.472379207611084, "learning_rate": 8.550724637681159e-07, "loss": 0.7283, "step": 354 }, { "epoch": 0.05, "grad_norm": 7.909978866577148, "learning_rate": 8.57487922705314e-07, "loss": 0.8101, "step": 355 }, { "epoch": 0.05, "grad_norm": 8.699928283691406, "learning_rate": 8.599033816425121e-07, "loss": 0.7576, "step": 356 }, { "epoch": 0.05, "grad_norm": 8.20464038848877, "learning_rate": 8.623188405797101e-07, "loss": 0.7895, "step": 357 }, { "epoch": 0.05, "grad_norm": 9.446338653564453, "learning_rate": 8.647342995169082e-07, "loss": 0.7442, "step": 358 }, { "epoch": 0.05, "grad_norm": 7.428435802459717, "learning_rate": 8.671497584541062e-07, "loss": 0.7196, "step": 359 }, { "epoch": 0.05, "grad_norm": 8.312899589538574, "learning_rate": 8.695652173913043e-07, "loss": 0.783, "step": 360 }, { "epoch": 0.05, "grad_norm": 7.789972305297852, "learning_rate": 8.719806763285023e-07, "loss": 0.8796, "step": 361 }, { "epoch": 0.05, "grad_norm": 8.151762008666992, "learning_rate": 8.743961352657005e-07, "loss": 0.7217, "step": 362 }, { "epoch": 0.05, "grad_norm": 8.963635444641113, "learning_rate": 8.768115942028985e-07, "loss": 0.7986, "step": 363 }, { "epoch": 0.05, "grad_norm": 8.651167869567871, "learning_rate": 8.792270531400966e-07, "loss": 0.7428, "step": 364 }, { "epoch": 0.05, "grad_norm": 8.809622764587402, "learning_rate": 8.816425120772947e-07, "loss": 0.8273, "step": 365 }, { "epoch": 0.05, "grad_norm": 9.235517501831055, "learning_rate": 8.840579710144928e-07, "loss": 0.7829, "step": 366 }, { "epoch": 0.05, "grad_norm": 7.57908821105957, "learning_rate": 8.864734299516908e-07, "loss": 0.8388, "step": 367 }, { "epoch": 0.05, "grad_norm": 8.325371742248535, "learning_rate": 8.888888888888888e-07, "loss": 0.767, "step": 368 }, { "epoch": 0.05, "grad_norm": 8.383179664611816, "learning_rate": 8.913043478260869e-07, "loss": 0.7317, "step": 369 }, { "epoch": 0.05, "grad_norm": 7.931764602661133, "learning_rate": 8.93719806763285e-07, "loss": 0.7764, "step": 370 }, { "epoch": 0.05, "grad_norm": 8.12639045715332, "learning_rate": 8.96135265700483e-07, "loss": 0.846, "step": 371 }, { "epoch": 0.05, "grad_norm": 8.713008880615234, "learning_rate": 8.985507246376811e-07, "loss": 0.8713, "step": 372 }, { "epoch": 0.05, "grad_norm": 8.319939613342285, "learning_rate": 9.009661835748792e-07, "loss": 0.8405, "step": 373 }, { "epoch": 0.05, "grad_norm": 8.049751281738281, "learning_rate": 9.033816425120773e-07, "loss": 0.7186, "step": 374 }, { "epoch": 0.05, "grad_norm": 7.952052593231201, "learning_rate": 9.057971014492753e-07, "loss": 0.7353, "step": 375 }, { "epoch": 0.05, "grad_norm": 9.397566795349121, "learning_rate": 9.082125603864735e-07, "loss": 0.9053, "step": 376 }, { "epoch": 0.05, "grad_norm": 8.391581535339355, "learning_rate": 9.106280193236715e-07, "loss": 0.6892, "step": 377 }, { "epoch": 0.05, "grad_norm": 7.2003350257873535, "learning_rate": 9.130434782608695e-07, "loss": 0.7308, "step": 378 }, { "epoch": 0.05, "grad_norm": 8.717453956604004, "learning_rate": 9.154589371980675e-07, "loss": 0.8644, "step": 379 }, { "epoch": 0.06, "grad_norm": 8.499171257019043, "learning_rate": 9.178743961352657e-07, "loss": 0.7689, "step": 380 }, { "epoch": 0.06, "grad_norm": 8.13824462890625, "learning_rate": 9.202898550724637e-07, "loss": 0.7588, "step": 381 }, { "epoch": 0.06, "grad_norm": 8.33141803741455, "learning_rate": 9.227053140096618e-07, "loss": 0.91, "step": 382 }, { "epoch": 0.06, "grad_norm": 7.944968223571777, "learning_rate": 9.251207729468598e-07, "loss": 0.7478, "step": 383 }, { "epoch": 0.06, "grad_norm": 8.532755851745605, "learning_rate": 9.27536231884058e-07, "loss": 0.7712, "step": 384 }, { "epoch": 0.06, "grad_norm": 8.389952659606934, "learning_rate": 9.29951690821256e-07, "loss": 0.8252, "step": 385 }, { "epoch": 0.06, "grad_norm": 7.9497785568237305, "learning_rate": 9.323671497584541e-07, "loss": 0.8252, "step": 386 }, { "epoch": 0.06, "grad_norm": 8.506481170654297, "learning_rate": 9.347826086956522e-07, "loss": 0.7545, "step": 387 }, { "epoch": 0.06, "grad_norm": 7.519574165344238, "learning_rate": 9.371980676328503e-07, "loss": 0.7113, "step": 388 }, { "epoch": 0.06, "grad_norm": 8.484254837036133, "learning_rate": 9.396135265700482e-07, "loss": 0.7632, "step": 389 }, { "epoch": 0.06, "grad_norm": 7.287871837615967, "learning_rate": 9.420289855072463e-07, "loss": 0.7467, "step": 390 }, { "epoch": 0.06, "grad_norm": 8.094898223876953, "learning_rate": 9.444444444444444e-07, "loss": 0.7717, "step": 391 }, { "epoch": 0.06, "grad_norm": 8.271889686584473, "learning_rate": 9.468599033816425e-07, "loss": 0.7933, "step": 392 }, { "epoch": 0.06, "grad_norm": 8.60461139678955, "learning_rate": 9.492753623188405e-07, "loss": 0.7858, "step": 393 }, { "epoch": 0.06, "grad_norm": 8.141101837158203, "learning_rate": 9.516908212560386e-07, "loss": 0.8659, "step": 394 }, { "epoch": 0.06, "grad_norm": 8.006037712097168, "learning_rate": 9.541062801932366e-07, "loss": 0.777, "step": 395 }, { "epoch": 0.06, "grad_norm": 7.972829341888428, "learning_rate": 9.565217391304349e-07, "loss": 0.8062, "step": 396 }, { "epoch": 0.06, "grad_norm": 7.443075180053711, "learning_rate": 9.58937198067633e-07, "loss": 0.7261, "step": 397 }, { "epoch": 0.06, "grad_norm": 6.8816938400268555, "learning_rate": 9.61352657004831e-07, "loss": 0.7443, "step": 398 }, { "epoch": 0.06, "grad_norm": 7.797282695770264, "learning_rate": 9.637681159420288e-07, "loss": 0.8108, "step": 399 }, { "epoch": 0.06, "grad_norm": 8.157563209533691, "learning_rate": 9.66183574879227e-07, "loss": 0.6855, "step": 400 }, { "epoch": 0.06, "grad_norm": 9.4524564743042, "learning_rate": 9.685990338164251e-07, "loss": 0.8176, "step": 401 }, { "epoch": 0.06, "grad_norm": 7.490127086639404, "learning_rate": 9.710144927536232e-07, "loss": 0.7413, "step": 402 }, { "epoch": 0.06, "grad_norm": 8.020041465759277, "learning_rate": 9.734299516908212e-07, "loss": 0.7366, "step": 403 }, { "epoch": 0.06, "grad_norm": 7.954964637756348, "learning_rate": 9.758454106280193e-07, "loss": 0.7379, "step": 404 }, { "epoch": 0.06, "grad_norm": 8.513673782348633, "learning_rate": 9.782608695652173e-07, "loss": 0.7858, "step": 405 }, { "epoch": 0.06, "grad_norm": 7.9393110275268555, "learning_rate": 9.806763285024154e-07, "loss": 0.8397, "step": 406 }, { "epoch": 0.06, "grad_norm": 8.755376815795898, "learning_rate": 9.830917874396136e-07, "loss": 0.7432, "step": 407 }, { "epoch": 0.06, "grad_norm": 7.526410102844238, "learning_rate": 9.855072463768117e-07, "loss": 0.7694, "step": 408 }, { "epoch": 0.06, "grad_norm": 7.9460768699646, "learning_rate": 9.879227053140095e-07, "loss": 0.8322, "step": 409 }, { "epoch": 0.06, "grad_norm": 7.563000679016113, "learning_rate": 9.903381642512075e-07, "loss": 0.8549, "step": 410 }, { "epoch": 0.06, "grad_norm": 7.7790207862854, "learning_rate": 9.927536231884058e-07, "loss": 0.8343, "step": 411 }, { "epoch": 0.06, "grad_norm": 8.624224662780762, "learning_rate": 9.951690821256039e-07, "loss": 0.758, "step": 412 }, { "epoch": 0.06, "grad_norm": 8.341802597045898, "learning_rate": 9.97584541062802e-07, "loss": 0.7439, "step": 413 }, { "epoch": 0.06, "grad_norm": 8.302218437194824, "learning_rate": 1e-06, "loss": 0.7138, "step": 414 }, { "epoch": 0.06, "grad_norm": 8.391666412353516, "learning_rate": 1.002415458937198e-06, "loss": 0.7547, "step": 415 }, { "epoch": 0.06, "grad_norm": 8.21535873413086, "learning_rate": 1.004830917874396e-06, "loss": 0.8021, "step": 416 }, { "epoch": 0.06, "grad_norm": 7.437970161437988, "learning_rate": 1.007246376811594e-06, "loss": 0.8537, "step": 417 }, { "epoch": 0.06, "grad_norm": 7.444977283477783, "learning_rate": 1.0096618357487924e-06, "loss": 0.8254, "step": 418 }, { "epoch": 0.06, "grad_norm": 8.374068260192871, "learning_rate": 1.0120772946859904e-06, "loss": 0.7777, "step": 419 }, { "epoch": 0.06, "grad_norm": 8.710249900817871, "learning_rate": 1.0144927536231885e-06, "loss": 0.8059, "step": 420 }, { "epoch": 0.06, "grad_norm": 7.985867023468018, "learning_rate": 1.0169082125603865e-06, "loss": 0.6851, "step": 421 }, { "epoch": 0.06, "grad_norm": 8.228849411010742, "learning_rate": 1.0193236714975846e-06, "loss": 0.749, "step": 422 }, { "epoch": 0.06, "grad_norm": 8.238436698913574, "learning_rate": 1.0217391304347826e-06, "loss": 0.7409, "step": 423 }, { "epoch": 0.06, "grad_norm": 8.436107635498047, "learning_rate": 1.0241545893719807e-06, "loss": 0.8075, "step": 424 }, { "epoch": 0.06, "grad_norm": 8.231524467468262, "learning_rate": 1.0265700483091785e-06, "loss": 0.7998, "step": 425 }, { "epoch": 0.06, "grad_norm": 7.590820789337158, "learning_rate": 1.0289855072463767e-06, "loss": 0.7448, "step": 426 }, { "epoch": 0.06, "grad_norm": 9.044777870178223, "learning_rate": 1.0314009661835748e-06, "loss": 0.7634, "step": 427 }, { "epoch": 0.06, "grad_norm": 7.60164213180542, "learning_rate": 1.0338164251207728e-06, "loss": 0.7522, "step": 428 }, { "epoch": 0.06, "grad_norm": 7.6475114822387695, "learning_rate": 1.036231884057971e-06, "loss": 0.7638, "step": 429 }, { "epoch": 0.06, "grad_norm": 7.594768524169922, "learning_rate": 1.038647342995169e-06, "loss": 0.7992, "step": 430 }, { "epoch": 0.06, "grad_norm": 7.99544620513916, "learning_rate": 1.041062801932367e-06, "loss": 0.841, "step": 431 }, { "epoch": 0.06, "grad_norm": 8.007787704467773, "learning_rate": 1.043478260869565e-06, "loss": 0.6681, "step": 432 }, { "epoch": 0.06, "grad_norm": 8.175726890563965, "learning_rate": 1.0458937198067633e-06, "loss": 0.7689, "step": 433 }, { "epoch": 0.06, "grad_norm": 8.0227689743042, "learning_rate": 1.0483091787439614e-06, "loss": 0.8419, "step": 434 }, { "epoch": 0.06, "grad_norm": 7.686334133148193, "learning_rate": 1.0507246376811594e-06, "loss": 0.7333, "step": 435 }, { "epoch": 0.06, "grad_norm": 7.913011074066162, "learning_rate": 1.0531400966183574e-06, "loss": 0.8336, "step": 436 }, { "epoch": 0.06, "grad_norm": 7.751181125640869, "learning_rate": 1.0555555555555555e-06, "loss": 0.7493, "step": 437 }, { "epoch": 0.06, "grad_norm": 9.220331192016602, "learning_rate": 1.0579710144927535e-06, "loss": 0.805, "step": 438 }, { "epoch": 0.06, "grad_norm": 7.85330057144165, "learning_rate": 1.0603864734299516e-06, "loss": 0.8026, "step": 439 }, { "epoch": 0.06, "grad_norm": 8.668082237243652, "learning_rate": 1.0628019323671499e-06, "loss": 0.8338, "step": 440 }, { "epoch": 0.06, "grad_norm": 8.38031005859375, "learning_rate": 1.065217391304348e-06, "loss": 0.8322, "step": 441 }, { "epoch": 0.06, "grad_norm": 7.563913822174072, "learning_rate": 1.067632850241546e-06, "loss": 0.7625, "step": 442 }, { "epoch": 0.06, "grad_norm": 8.898241996765137, "learning_rate": 1.070048309178744e-06, "loss": 0.7813, "step": 443 }, { "epoch": 0.06, "grad_norm": 7.450992584228516, "learning_rate": 1.072463768115942e-06, "loss": 0.6387, "step": 444 }, { "epoch": 0.06, "grad_norm": 7.197563648223877, "learning_rate": 1.07487922705314e-06, "loss": 0.7313, "step": 445 }, { "epoch": 0.06, "grad_norm": 7.701167583465576, "learning_rate": 1.077294685990338e-06, "loss": 0.8017, "step": 446 }, { "epoch": 0.06, "grad_norm": 8.239095687866211, "learning_rate": 1.0797101449275362e-06, "loss": 0.6969, "step": 447 }, { "epoch": 0.07, "grad_norm": 7.449378967285156, "learning_rate": 1.0821256038647342e-06, "loss": 0.7494, "step": 448 }, { "epoch": 0.07, "grad_norm": 8.238497734069824, "learning_rate": 1.0845410628019323e-06, "loss": 0.7046, "step": 449 }, { "epoch": 0.07, "grad_norm": 7.730191230773926, "learning_rate": 1.0869565217391303e-06, "loss": 0.7446, "step": 450 }, { "epoch": 0.07, "grad_norm": 8.578824043273926, "learning_rate": 1.0893719806763284e-06, "loss": 0.7279, "step": 451 }, { "epoch": 0.07, "grad_norm": 8.07958698272705, "learning_rate": 1.0917874396135264e-06, "loss": 0.7742, "step": 452 }, { "epoch": 0.07, "grad_norm": 8.864911079406738, "learning_rate": 1.0942028985507245e-06, "loss": 0.7962, "step": 453 }, { "epoch": 0.07, "grad_norm": 7.943617343902588, "learning_rate": 1.0966183574879227e-06, "loss": 0.7197, "step": 454 }, { "epoch": 0.07, "grad_norm": 8.305096626281738, "learning_rate": 1.0990338164251208e-06, "loss": 0.867, "step": 455 }, { "epoch": 0.07, "grad_norm": 7.5069050788879395, "learning_rate": 1.1014492753623188e-06, "loss": 0.7473, "step": 456 }, { "epoch": 0.07, "grad_norm": 7.5866594314575195, "learning_rate": 1.1038647342995169e-06, "loss": 0.7699, "step": 457 }, { "epoch": 0.07, "grad_norm": 8.012004852294922, "learning_rate": 1.106280193236715e-06, "loss": 0.7491, "step": 458 }, { "epoch": 0.07, "grad_norm": 8.038968086242676, "learning_rate": 1.108695652173913e-06, "loss": 0.8007, "step": 459 }, { "epoch": 0.07, "grad_norm": 8.965154647827148, "learning_rate": 1.111111111111111e-06, "loss": 0.8587, "step": 460 }, { "epoch": 0.07, "grad_norm": 9.106990814208984, "learning_rate": 1.113526570048309e-06, "loss": 0.7698, "step": 461 }, { "epoch": 0.07, "grad_norm": 8.772173881530762, "learning_rate": 1.1159420289855073e-06, "loss": 0.8869, "step": 462 }, { "epoch": 0.07, "grad_norm": 9.330313682556152, "learning_rate": 1.1183574879227054e-06, "loss": 1.0228, "step": 463 }, { "epoch": 0.07, "grad_norm": 8.727599143981934, "learning_rate": 1.1207729468599034e-06, "loss": 0.8369, "step": 464 }, { "epoch": 0.07, "grad_norm": 8.023737907409668, "learning_rate": 1.1231884057971015e-06, "loss": 0.7928, "step": 465 }, { "epoch": 0.07, "grad_norm": 7.939091205596924, "learning_rate": 1.1256038647342993e-06, "loss": 0.7624, "step": 466 }, { "epoch": 0.07, "grad_norm": 8.309566497802734, "learning_rate": 1.1280193236714974e-06, "loss": 0.7664, "step": 467 }, { "epoch": 0.07, "grad_norm": 9.097074508666992, "learning_rate": 1.1304347826086954e-06, "loss": 0.7168, "step": 468 }, { "epoch": 0.07, "grad_norm": 7.722247123718262, "learning_rate": 1.1328502415458937e-06, "loss": 0.719, "step": 469 }, { "epoch": 0.07, "grad_norm": 9.23708724975586, "learning_rate": 1.1352657004830917e-06, "loss": 0.8279, "step": 470 }, { "epoch": 0.07, "grad_norm": 8.550832748413086, "learning_rate": 1.1376811594202898e-06, "loss": 0.7139, "step": 471 }, { "epoch": 0.07, "grad_norm": 7.485679626464844, "learning_rate": 1.1400966183574878e-06, "loss": 0.7597, "step": 472 }, { "epoch": 0.07, "grad_norm": 7.947446346282959, "learning_rate": 1.1425120772946859e-06, "loss": 0.7923, "step": 473 }, { "epoch": 0.07, "grad_norm": 7.087867736816406, "learning_rate": 1.144927536231884e-06, "loss": 0.7509, "step": 474 }, { "epoch": 0.07, "grad_norm": 8.130386352539062, "learning_rate": 1.147342995169082e-06, "loss": 0.8298, "step": 475 }, { "epoch": 0.07, "grad_norm": 7.775681018829346, "learning_rate": 1.1497584541062802e-06, "loss": 0.6882, "step": 476 }, { "epoch": 0.07, "grad_norm": 7.740961074829102, "learning_rate": 1.1521739130434783e-06, "loss": 0.773, "step": 477 }, { "epoch": 0.07, "grad_norm": 8.66746997833252, "learning_rate": 1.1545893719806763e-06, "loss": 0.698, "step": 478 }, { "epoch": 0.07, "grad_norm": 7.108086109161377, "learning_rate": 1.1570048309178744e-06, "loss": 0.8066, "step": 479 }, { "epoch": 0.07, "grad_norm": 8.02619743347168, "learning_rate": 1.1594202898550724e-06, "loss": 0.7781, "step": 480 }, { "epoch": 0.07, "grad_norm": 8.342351913452148, "learning_rate": 1.1618357487922705e-06, "loss": 0.77, "step": 481 }, { "epoch": 0.07, "grad_norm": 8.255386352539062, "learning_rate": 1.1642512077294685e-06, "loss": 0.7922, "step": 482 }, { "epoch": 0.07, "grad_norm": 7.675711154937744, "learning_rate": 1.1666666666666668e-06, "loss": 0.791, "step": 483 }, { "epoch": 0.07, "grad_norm": 8.192439079284668, "learning_rate": 1.1690821256038648e-06, "loss": 0.7665, "step": 484 }, { "epoch": 0.07, "grad_norm": 8.459661483764648, "learning_rate": 1.1714975845410629e-06, "loss": 0.7784, "step": 485 }, { "epoch": 0.07, "grad_norm": 7.5276288986206055, "learning_rate": 1.173913043478261e-06, "loss": 0.815, "step": 486 }, { "epoch": 0.07, "grad_norm": 7.378063678741455, "learning_rate": 1.1763285024154588e-06, "loss": 0.7496, "step": 487 }, { "epoch": 0.07, "grad_norm": 8.394169807434082, "learning_rate": 1.1787439613526568e-06, "loss": 0.773, "step": 488 }, { "epoch": 0.07, "grad_norm": 7.926616191864014, "learning_rate": 1.1811594202898549e-06, "loss": 0.7979, "step": 489 }, { "epoch": 0.07, "grad_norm": 8.223981857299805, "learning_rate": 1.183574879227053e-06, "loss": 0.7224, "step": 490 }, { "epoch": 0.07, "grad_norm": 7.552475929260254, "learning_rate": 1.1859903381642512e-06, "loss": 0.7517, "step": 491 }, { "epoch": 0.07, "grad_norm": 7.966248512268066, "learning_rate": 1.1884057971014492e-06, "loss": 0.8006, "step": 492 }, { "epoch": 0.07, "grad_norm": 7.98259162902832, "learning_rate": 1.1908212560386473e-06, "loss": 0.7154, "step": 493 }, { "epoch": 0.07, "grad_norm": 8.321023941040039, "learning_rate": 1.1932367149758453e-06, "loss": 0.7083, "step": 494 }, { "epoch": 0.07, "grad_norm": 8.698357582092285, "learning_rate": 1.1956521739130434e-06, "loss": 0.7035, "step": 495 }, { "epoch": 0.07, "grad_norm": 8.323665618896484, "learning_rate": 1.1980676328502414e-06, "loss": 0.789, "step": 496 }, { "epoch": 0.07, "grad_norm": 8.993846893310547, "learning_rate": 1.2004830917874395e-06, "loss": 0.7696, "step": 497 }, { "epoch": 0.07, "grad_norm": 7.7983479499816895, "learning_rate": 1.2028985507246377e-06, "loss": 0.8112, "step": 498 }, { "epoch": 0.07, "grad_norm": 7.80042028427124, "learning_rate": 1.2053140096618358e-06, "loss": 0.7422, "step": 499 }, { "epoch": 0.07, "grad_norm": 8.279308319091797, "learning_rate": 1.2077294685990338e-06, "loss": 0.8607, "step": 500 }, { "epoch": 0.07, "grad_norm": 8.143479347229004, "learning_rate": 1.2101449275362319e-06, "loss": 0.8557, "step": 501 }, { "epoch": 0.07, "grad_norm": 8.170121192932129, "learning_rate": 1.21256038647343e-06, "loss": 0.8442, "step": 502 }, { "epoch": 0.07, "grad_norm": 7.1004815101623535, "learning_rate": 1.214975845410628e-06, "loss": 0.8729, "step": 503 }, { "epoch": 0.07, "grad_norm": 7.846823692321777, "learning_rate": 1.217391304347826e-06, "loss": 0.7874, "step": 504 }, { "epoch": 0.07, "grad_norm": 7.886445999145508, "learning_rate": 1.2198067632850243e-06, "loss": 0.7541, "step": 505 }, { "epoch": 0.07, "grad_norm": 7.759784698486328, "learning_rate": 1.2222222222222223e-06, "loss": 0.8236, "step": 506 }, { "epoch": 0.07, "grad_norm": 7.6482720375061035, "learning_rate": 1.2246376811594204e-06, "loss": 0.8808, "step": 507 }, { "epoch": 0.07, "grad_norm": 12.594780921936035, "learning_rate": 1.2270531400966182e-06, "loss": 0.8541, "step": 508 }, { "epoch": 0.07, "grad_norm": 8.353472709655762, "learning_rate": 1.2294685990338163e-06, "loss": 0.9004, "step": 509 }, { "epoch": 0.07, "grad_norm": 8.144922256469727, "learning_rate": 1.2318840579710143e-06, "loss": 0.7738, "step": 510 }, { "epoch": 0.07, "grad_norm": 9.693279266357422, "learning_rate": 1.2342995169082124e-06, "loss": 0.8266, "step": 511 }, { "epoch": 0.07, "grad_norm": 8.418000221252441, "learning_rate": 1.2367149758454104e-06, "loss": 0.8395, "step": 512 }, { "epoch": 0.07, "grad_norm": 8.044658660888672, "learning_rate": 1.2391304347826087e-06, "loss": 0.7703, "step": 513 }, { "epoch": 0.07, "grad_norm": 7.294851303100586, "learning_rate": 1.2415458937198067e-06, "loss": 0.7618, "step": 514 }, { "epoch": 0.07, "grad_norm": 7.818904876708984, "learning_rate": 1.2439613526570048e-06, "loss": 0.7087, "step": 515 }, { "epoch": 0.07, "grad_norm": 7.518894672393799, "learning_rate": 1.2463768115942028e-06, "loss": 0.8119, "step": 516 }, { "epoch": 0.08, "grad_norm": 8.241599082946777, "learning_rate": 1.2487922705314009e-06, "loss": 0.8364, "step": 517 }, { "epoch": 0.08, "grad_norm": 7.857913494110107, "learning_rate": 1.251207729468599e-06, "loss": 0.8031, "step": 518 }, { "epoch": 0.08, "grad_norm": 8.202010154724121, "learning_rate": 1.253623188405797e-06, "loss": 0.8803, "step": 519 }, { "epoch": 0.08, "grad_norm": 7.085601329803467, "learning_rate": 1.2560386473429952e-06, "loss": 0.7354, "step": 520 }, { "epoch": 0.08, "grad_norm": 7.756953239440918, "learning_rate": 1.2584541062801933e-06, "loss": 0.8557, "step": 521 }, { "epoch": 0.08, "grad_norm": 7.781080722808838, "learning_rate": 1.2608695652173913e-06, "loss": 0.8193, "step": 522 }, { "epoch": 0.08, "grad_norm": 7.618258476257324, "learning_rate": 1.2632850241545894e-06, "loss": 0.7615, "step": 523 }, { "epoch": 0.08, "grad_norm": 7.968803405761719, "learning_rate": 1.2657004830917874e-06, "loss": 0.822, "step": 524 }, { "epoch": 0.08, "grad_norm": 8.66061782836914, "learning_rate": 1.2681159420289855e-06, "loss": 0.7468, "step": 525 }, { "epoch": 0.08, "grad_norm": 7.642910957336426, "learning_rate": 1.2705314009661835e-06, "loss": 0.7219, "step": 526 }, { "epoch": 0.08, "grad_norm": 8.517637252807617, "learning_rate": 1.2729468599033818e-06, "loss": 0.8268, "step": 527 }, { "epoch": 0.08, "grad_norm": 8.46310043334961, "learning_rate": 1.2753623188405796e-06, "loss": 0.8112, "step": 528 }, { "epoch": 0.08, "grad_norm": 8.475865364074707, "learning_rate": 1.2777777777777777e-06, "loss": 0.8062, "step": 529 }, { "epoch": 0.08, "grad_norm": 8.027172088623047, "learning_rate": 1.2801932367149757e-06, "loss": 0.7545, "step": 530 }, { "epoch": 0.08, "grad_norm": 7.7695112228393555, "learning_rate": 1.2826086956521738e-06, "loss": 0.7162, "step": 531 }, { "epoch": 0.08, "grad_norm": 8.120187759399414, "learning_rate": 1.2850241545893718e-06, "loss": 0.8581, "step": 532 }, { "epoch": 0.08, "grad_norm": 7.971390724182129, "learning_rate": 1.2874396135265699e-06, "loss": 0.745, "step": 533 }, { "epoch": 0.08, "grad_norm": 7.682831764221191, "learning_rate": 1.2898550724637681e-06, "loss": 0.6475, "step": 534 }, { "epoch": 0.08, "grad_norm": 7.5248332023620605, "learning_rate": 1.2922705314009662e-06, "loss": 0.7643, "step": 535 }, { "epoch": 0.08, "grad_norm": 8.140870094299316, "learning_rate": 1.2946859903381642e-06, "loss": 0.7242, "step": 536 }, { "epoch": 0.08, "grad_norm": 8.24442195892334, "learning_rate": 1.2971014492753623e-06, "loss": 0.7799, "step": 537 }, { "epoch": 0.08, "grad_norm": 8.618619918823242, "learning_rate": 1.2995169082125603e-06, "loss": 0.8299, "step": 538 }, { "epoch": 0.08, "grad_norm": 8.022990226745605, "learning_rate": 1.3019323671497584e-06, "loss": 0.7491, "step": 539 }, { "epoch": 0.08, "grad_norm": 8.273419380187988, "learning_rate": 1.3043478260869564e-06, "loss": 0.7352, "step": 540 }, { "epoch": 0.08, "grad_norm": 8.113691329956055, "learning_rate": 1.3067632850241545e-06, "loss": 0.7083, "step": 541 }, { "epoch": 0.08, "grad_norm": 7.748983860015869, "learning_rate": 1.3091787439613527e-06, "loss": 0.8205, "step": 542 }, { "epoch": 0.08, "grad_norm": 7.622618198394775, "learning_rate": 1.3115942028985508e-06, "loss": 0.763, "step": 543 }, { "epoch": 0.08, "grad_norm": 7.712653160095215, "learning_rate": 1.3140096618357488e-06, "loss": 0.7706, "step": 544 }, { "epoch": 0.08, "grad_norm": 8.667501449584961, "learning_rate": 1.3164251207729469e-06, "loss": 0.7484, "step": 545 }, { "epoch": 0.08, "grad_norm": 7.632143974304199, "learning_rate": 1.318840579710145e-06, "loss": 0.782, "step": 546 }, { "epoch": 0.08, "grad_norm": 7.395961761474609, "learning_rate": 1.321256038647343e-06, "loss": 0.7279, "step": 547 }, { "epoch": 0.08, "grad_norm": 7.6551384925842285, "learning_rate": 1.323671497584541e-06, "loss": 0.778, "step": 548 }, { "epoch": 0.08, "grad_norm": 7.8897857666015625, "learning_rate": 1.326086956521739e-06, "loss": 0.8749, "step": 549 }, { "epoch": 0.08, "grad_norm": 8.681697845458984, "learning_rate": 1.328502415458937e-06, "loss": 0.8495, "step": 550 }, { "epoch": 0.08, "grad_norm": 7.5176849365234375, "learning_rate": 1.3309178743961352e-06, "loss": 0.7126, "step": 551 }, { "epoch": 0.08, "grad_norm": 7.636926174163818, "learning_rate": 1.3333333333333332e-06, "loss": 0.758, "step": 552 }, { "epoch": 0.08, "grad_norm": 7.425988674163818, "learning_rate": 1.3357487922705313e-06, "loss": 0.7722, "step": 553 }, { "epoch": 0.08, "grad_norm": 7.417726993560791, "learning_rate": 1.3381642512077293e-06, "loss": 0.812, "step": 554 }, { "epoch": 0.08, "grad_norm": 8.119747161865234, "learning_rate": 1.3405797101449273e-06, "loss": 0.7703, "step": 555 }, { "epoch": 0.08, "grad_norm": 6.682091236114502, "learning_rate": 1.3429951690821256e-06, "loss": 0.7125, "step": 556 }, { "epoch": 0.08, "grad_norm": 7.913062572479248, "learning_rate": 1.3454106280193237e-06, "loss": 0.7605, "step": 557 }, { "epoch": 0.08, "grad_norm": 7.995806694030762, "learning_rate": 1.3478260869565217e-06, "loss": 0.6997, "step": 558 }, { "epoch": 0.08, "grad_norm": 7.5193071365356445, "learning_rate": 1.3502415458937198e-06, "loss": 0.7912, "step": 559 }, { "epoch": 0.08, "grad_norm": 6.45506477355957, "learning_rate": 1.3526570048309178e-06, "loss": 0.7723, "step": 560 }, { "epoch": 0.08, "grad_norm": 7.068336486816406, "learning_rate": 1.3550724637681159e-06, "loss": 0.7837, "step": 561 }, { "epoch": 0.08, "grad_norm": 8.316934585571289, "learning_rate": 1.357487922705314e-06, "loss": 0.8254, "step": 562 }, { "epoch": 0.08, "grad_norm": 7.603231430053711, "learning_rate": 1.3599033816425122e-06, "loss": 0.754, "step": 563 }, { "epoch": 0.08, "grad_norm": 7.6187896728515625, "learning_rate": 1.3623188405797102e-06, "loss": 0.8093, "step": 564 }, { "epoch": 0.08, "grad_norm": 8.077376365661621, "learning_rate": 1.3647342995169083e-06, "loss": 0.8625, "step": 565 }, { "epoch": 0.08, "grad_norm": 7.924118995666504, "learning_rate": 1.3671497584541063e-06, "loss": 0.8194, "step": 566 }, { "epoch": 0.08, "grad_norm": 7.643595218658447, "learning_rate": 1.3695652173913044e-06, "loss": 0.8402, "step": 567 }, { "epoch": 0.08, "grad_norm": 7.369141101837158, "learning_rate": 1.3719806763285024e-06, "loss": 0.7309, "step": 568 }, { "epoch": 0.08, "grad_norm": 7.523855209350586, "learning_rate": 1.3743961352657005e-06, "loss": 0.8299, "step": 569 }, { "epoch": 0.08, "grad_norm": 7.997917652130127, "learning_rate": 1.3768115942028983e-06, "loss": 0.709, "step": 570 }, { "epoch": 0.08, "grad_norm": 8.564833641052246, "learning_rate": 1.3792270531400965e-06, "loss": 0.7289, "step": 571 }, { "epoch": 0.08, "grad_norm": 7.917374610900879, "learning_rate": 1.3816425120772946e-06, "loss": 0.7562, "step": 572 }, { "epoch": 0.08, "grad_norm": 8.074695587158203, "learning_rate": 1.3840579710144926e-06, "loss": 0.7001, "step": 573 }, { "epoch": 0.08, "grad_norm": 8.304718971252441, "learning_rate": 1.3864734299516907e-06, "loss": 0.7823, "step": 574 }, { "epoch": 0.08, "grad_norm": 8.429527282714844, "learning_rate": 1.3888888888888887e-06, "loss": 0.8595, "step": 575 }, { "epoch": 0.08, "grad_norm": 7.3007097244262695, "learning_rate": 1.3913043478260868e-06, "loss": 0.7485, "step": 576 }, { "epoch": 0.08, "grad_norm": 7.27805757522583, "learning_rate": 1.3937198067632848e-06, "loss": 0.7453, "step": 577 }, { "epoch": 0.08, "grad_norm": 8.303080558776855, "learning_rate": 1.396135265700483e-06, "loss": 0.862, "step": 578 }, { "epoch": 0.08, "grad_norm": 6.889773368835449, "learning_rate": 1.3985507246376811e-06, "loss": 0.6833, "step": 579 }, { "epoch": 0.08, "grad_norm": 7.026730537414551, "learning_rate": 1.4009661835748792e-06, "loss": 0.7916, "step": 580 }, { "epoch": 0.08, "grad_norm": 7.111421585083008, "learning_rate": 1.4033816425120772e-06, "loss": 0.7177, "step": 581 }, { "epoch": 0.08, "grad_norm": 7.377074718475342, "learning_rate": 1.4057971014492753e-06, "loss": 0.7862, "step": 582 }, { "epoch": 0.08, "grad_norm": 7.718629360198975, "learning_rate": 1.4082125603864733e-06, "loss": 0.7549, "step": 583 }, { "epoch": 0.08, "grad_norm": 7.90329122543335, "learning_rate": 1.4106280193236714e-06, "loss": 0.7498, "step": 584 }, { "epoch": 0.08, "grad_norm": 8.144021987915039, "learning_rate": 1.4130434782608697e-06, "loss": 0.7901, "step": 585 }, { "epoch": 0.09, "grad_norm": 8.405168533325195, "learning_rate": 1.4154589371980677e-06, "loss": 0.7026, "step": 586 }, { "epoch": 0.09, "grad_norm": 7.745727062225342, "learning_rate": 1.4178743961352658e-06, "loss": 0.7513, "step": 587 }, { "epoch": 0.09, "grad_norm": 7.422682285308838, "learning_rate": 1.4202898550724638e-06, "loss": 0.7811, "step": 588 }, { "epoch": 0.09, "grad_norm": 7.599969387054443, "learning_rate": 1.4227053140096618e-06, "loss": 0.7786, "step": 589 }, { "epoch": 0.09, "grad_norm": 7.97927713394165, "learning_rate": 1.4251207729468597e-06, "loss": 0.7886, "step": 590 }, { "epoch": 0.09, "grad_norm": 7.106658458709717, "learning_rate": 1.4275362318840577e-06, "loss": 0.8126, "step": 591 }, { "epoch": 0.09, "grad_norm": 7.652963161468506, "learning_rate": 1.4299516908212558e-06, "loss": 0.7748, "step": 592 }, { "epoch": 0.09, "grad_norm": 8.204906463623047, "learning_rate": 1.432367149758454e-06, "loss": 0.7713, "step": 593 }, { "epoch": 0.09, "grad_norm": 8.075491905212402, "learning_rate": 1.434782608695652e-06, "loss": 0.7125, "step": 594 }, { "epoch": 0.09, "grad_norm": 8.19960880279541, "learning_rate": 1.4371980676328501e-06, "loss": 0.8761, "step": 595 }, { "epoch": 0.09, "grad_norm": 7.520110607147217, "learning_rate": 1.4396135265700482e-06, "loss": 0.7234, "step": 596 }, { "epoch": 0.09, "grad_norm": 7.416616439819336, "learning_rate": 1.4420289855072462e-06, "loss": 0.7665, "step": 597 }, { "epoch": 0.09, "grad_norm": 8.134371757507324, "learning_rate": 1.4444444444444443e-06, "loss": 0.8668, "step": 598 }, { "epoch": 0.09, "grad_norm": 7.303880214691162, "learning_rate": 1.4468599033816423e-06, "loss": 0.6791, "step": 599 }, { "epoch": 0.09, "grad_norm": 8.820582389831543, "learning_rate": 1.4492753623188406e-06, "loss": 0.8104, "step": 600 }, { "epoch": 0.09, "grad_norm": 7.300175189971924, "learning_rate": 1.4516908212560386e-06, "loss": 0.6883, "step": 601 }, { "epoch": 0.09, "grad_norm": 7.289283752441406, "learning_rate": 1.4541062801932367e-06, "loss": 0.8082, "step": 602 }, { "epoch": 0.09, "grad_norm": 7.418991565704346, "learning_rate": 1.4565217391304347e-06, "loss": 0.6848, "step": 603 }, { "epoch": 0.09, "grad_norm": 8.19428539276123, "learning_rate": 1.4589371980676328e-06, "loss": 0.8538, "step": 604 }, { "epoch": 0.09, "grad_norm": 7.58187198638916, "learning_rate": 1.4613526570048308e-06, "loss": 0.6714, "step": 605 }, { "epoch": 0.09, "grad_norm": 7.307172775268555, "learning_rate": 1.4637681159420289e-06, "loss": 0.6817, "step": 606 }, { "epoch": 0.09, "grad_norm": 7.492946147918701, "learning_rate": 1.4661835748792271e-06, "loss": 0.7556, "step": 607 }, { "epoch": 0.09, "grad_norm": 7.92487907409668, "learning_rate": 1.4685990338164252e-06, "loss": 0.7494, "step": 608 }, { "epoch": 0.09, "grad_norm": 8.698617935180664, "learning_rate": 1.4710144927536232e-06, "loss": 0.8511, "step": 609 }, { "epoch": 0.09, "grad_norm": 7.38287353515625, "learning_rate": 1.4734299516908213e-06, "loss": 0.7605, "step": 610 }, { "epoch": 0.09, "grad_norm": 7.126689910888672, "learning_rate": 1.4758454106280191e-06, "loss": 0.8644, "step": 611 }, { "epoch": 0.09, "grad_norm": 7.314368724822998, "learning_rate": 1.4782608695652172e-06, "loss": 0.728, "step": 612 }, { "epoch": 0.09, "grad_norm": 8.359411239624023, "learning_rate": 1.4806763285024152e-06, "loss": 0.8058, "step": 613 }, { "epoch": 0.09, "grad_norm": 7.211114406585693, "learning_rate": 1.4830917874396135e-06, "loss": 0.7517, "step": 614 }, { "epoch": 0.09, "grad_norm": 6.9591965675354, "learning_rate": 1.4855072463768115e-06, "loss": 0.8018, "step": 615 }, { "epoch": 0.09, "grad_norm": 7.008792877197266, "learning_rate": 1.4879227053140096e-06, "loss": 0.7067, "step": 616 }, { "epoch": 0.09, "grad_norm": 6.999328136444092, "learning_rate": 1.4903381642512076e-06, "loss": 0.8512, "step": 617 }, { "epoch": 0.09, "grad_norm": 7.104197025299072, "learning_rate": 1.4927536231884057e-06, "loss": 0.7592, "step": 618 }, { "epoch": 0.09, "grad_norm": 7.44596529006958, "learning_rate": 1.4951690821256037e-06, "loss": 0.7606, "step": 619 }, { "epoch": 0.09, "grad_norm": 7.739531993865967, "learning_rate": 1.4975845410628018e-06, "loss": 0.7993, "step": 620 }, { "epoch": 0.09, "grad_norm": 8.276627540588379, "learning_rate": 1.5e-06, "loss": 0.7731, "step": 621 }, { "epoch": 0.09, "grad_norm": 6.83575439453125, "learning_rate": 1.502415458937198e-06, "loss": 0.6684, "step": 622 }, { "epoch": 0.09, "grad_norm": 8.038110733032227, "learning_rate": 1.5048309178743961e-06, "loss": 0.8402, "step": 623 }, { "epoch": 0.09, "grad_norm": 7.998324871063232, "learning_rate": 1.5072463768115942e-06, "loss": 0.7627, "step": 624 }, { "epoch": 0.09, "grad_norm": 7.20554256439209, "learning_rate": 1.5096618357487922e-06, "loss": 0.7035, "step": 625 }, { "epoch": 0.09, "grad_norm": 7.524816036224365, "learning_rate": 1.5120772946859903e-06, "loss": 0.7913, "step": 626 }, { "epoch": 0.09, "grad_norm": 8.36446475982666, "learning_rate": 1.5144927536231883e-06, "loss": 0.76, "step": 627 }, { "epoch": 0.09, "grad_norm": 7.2002458572387695, "learning_rate": 1.5169082125603864e-06, "loss": 0.9109, "step": 628 }, { "epoch": 0.09, "grad_norm": 8.294758796691895, "learning_rate": 1.5193236714975846e-06, "loss": 0.699, "step": 629 }, { "epoch": 0.09, "grad_norm": 7.902064323425293, "learning_rate": 1.5217391304347827e-06, "loss": 0.8366, "step": 630 }, { "epoch": 0.09, "grad_norm": 7.574579238891602, "learning_rate": 1.5241545893719807e-06, "loss": 0.7319, "step": 631 }, { "epoch": 0.09, "grad_norm": 8.0476713180542, "learning_rate": 1.5265700483091786e-06, "loss": 0.7857, "step": 632 }, { "epoch": 0.09, "grad_norm": 6.497929573059082, "learning_rate": 1.5289855072463766e-06, "loss": 0.7091, "step": 633 }, { "epoch": 0.09, "grad_norm": 6.995360374450684, "learning_rate": 1.5314009661835747e-06, "loss": 0.8563, "step": 634 }, { "epoch": 0.09, "grad_norm": 7.046899795532227, "learning_rate": 1.5338164251207727e-06, "loss": 0.8, "step": 635 }, { "epoch": 0.09, "grad_norm": 7.868161678314209, "learning_rate": 1.536231884057971e-06, "loss": 0.6586, "step": 636 }, { "epoch": 0.09, "grad_norm": 7.762780666351318, "learning_rate": 1.538647342995169e-06, "loss": 0.7529, "step": 637 }, { "epoch": 0.09, "grad_norm": 7.3821940422058105, "learning_rate": 1.541062801932367e-06, "loss": 0.7929, "step": 638 }, { "epoch": 0.09, "grad_norm": 6.968517303466797, "learning_rate": 1.5434782608695651e-06, "loss": 0.7515, "step": 639 }, { "epoch": 0.09, "grad_norm": 7.733645915985107, "learning_rate": 1.5458937198067632e-06, "loss": 0.7112, "step": 640 }, { "epoch": 0.09, "grad_norm": 7.873400688171387, "learning_rate": 1.5483091787439612e-06, "loss": 0.8263, "step": 641 }, { "epoch": 0.09, "grad_norm": 7.698798179626465, "learning_rate": 1.5507246376811593e-06, "loss": 0.8396, "step": 642 }, { "epoch": 0.09, "grad_norm": 7.6605143547058105, "learning_rate": 1.5531400966183575e-06, "loss": 0.7753, "step": 643 }, { "epoch": 0.09, "grad_norm": 7.9188313484191895, "learning_rate": 1.5555555555555556e-06, "loss": 0.8495, "step": 644 }, { "epoch": 0.09, "grad_norm": 7.975691795349121, "learning_rate": 1.5579710144927536e-06, "loss": 0.8123, "step": 645 }, { "epoch": 0.09, "grad_norm": 7.020068168640137, "learning_rate": 1.5603864734299517e-06, "loss": 0.7388, "step": 646 }, { "epoch": 0.09, "grad_norm": 6.82908821105957, "learning_rate": 1.5628019323671497e-06, "loss": 0.7353, "step": 647 }, { "epoch": 0.09, "grad_norm": 7.716157913208008, "learning_rate": 1.5652173913043478e-06, "loss": 0.8115, "step": 648 }, { "epoch": 0.09, "grad_norm": 8.069461822509766, "learning_rate": 1.5676328502415458e-06, "loss": 0.8013, "step": 649 }, { "epoch": 0.09, "grad_norm": 7.534317493438721, "learning_rate": 1.570048309178744e-06, "loss": 0.8115, "step": 650 }, { "epoch": 0.09, "grad_norm": 7.451488971710205, "learning_rate": 1.5724637681159421e-06, "loss": 0.7523, "step": 651 }, { "epoch": 0.09, "grad_norm": 8.328240394592285, "learning_rate": 1.5748792270531402e-06, "loss": 0.9236, "step": 652 }, { "epoch": 0.09, "grad_norm": 7.054269313812256, "learning_rate": 1.577294685990338e-06, "loss": 0.8233, "step": 653 }, { "epoch": 0.09, "grad_norm": 6.838281631469727, "learning_rate": 1.579710144927536e-06, "loss": 0.8058, "step": 654 }, { "epoch": 0.1, "grad_norm": 8.135186195373535, "learning_rate": 1.5821256038647341e-06, "loss": 0.7837, "step": 655 }, { "epoch": 0.1, "grad_norm": 7.495578289031982, "learning_rate": 1.5845410628019322e-06, "loss": 0.7789, "step": 656 }, { "epoch": 0.1, "grad_norm": 7.389969825744629, "learning_rate": 1.5869565217391302e-06, "loss": 0.8367, "step": 657 }, { "epoch": 0.1, "grad_norm": 7.206267356872559, "learning_rate": 1.5893719806763285e-06, "loss": 0.8208, "step": 658 }, { "epoch": 0.1, "grad_norm": 6.699143409729004, "learning_rate": 1.5917874396135265e-06, "loss": 0.7063, "step": 659 }, { "epoch": 0.1, "grad_norm": 7.327918529510498, "learning_rate": 1.5942028985507246e-06, "loss": 0.7517, "step": 660 }, { "epoch": 0.1, "grad_norm": 6.542429447174072, "learning_rate": 1.5966183574879226e-06, "loss": 0.6719, "step": 661 }, { "epoch": 0.1, "grad_norm": 9.022879600524902, "learning_rate": 1.5990338164251207e-06, "loss": 0.7968, "step": 662 }, { "epoch": 0.1, "grad_norm": 7.1142096519470215, "learning_rate": 1.6014492753623187e-06, "loss": 0.7671, "step": 663 }, { "epoch": 0.1, "grad_norm": 8.029053688049316, "learning_rate": 1.6038647342995168e-06, "loss": 0.7684, "step": 664 }, { "epoch": 0.1, "grad_norm": 6.747452259063721, "learning_rate": 1.606280193236715e-06, "loss": 0.7392, "step": 665 }, { "epoch": 0.1, "grad_norm": 7.312702178955078, "learning_rate": 1.608695652173913e-06, "loss": 0.7473, "step": 666 }, { "epoch": 0.1, "grad_norm": 7.629759788513184, "learning_rate": 1.6111111111111111e-06, "loss": 0.7724, "step": 667 }, { "epoch": 0.1, "grad_norm": 7.515933990478516, "learning_rate": 1.6135265700483092e-06, "loss": 0.6624, "step": 668 }, { "epoch": 0.1, "grad_norm": 7.180361747741699, "learning_rate": 1.6159420289855072e-06, "loss": 0.7583, "step": 669 }, { "epoch": 0.1, "grad_norm": 7.401112079620361, "learning_rate": 1.6183574879227053e-06, "loss": 0.8645, "step": 670 }, { "epoch": 0.1, "grad_norm": 7.545596599578857, "learning_rate": 1.6207729468599033e-06, "loss": 0.8321, "step": 671 }, { "epoch": 0.1, "grad_norm": 8.462808609008789, "learning_rate": 1.6231884057971016e-06, "loss": 0.8322, "step": 672 }, { "epoch": 0.1, "grad_norm": 6.900582313537598, "learning_rate": 1.6256038647342994e-06, "loss": 0.7952, "step": 673 }, { "epoch": 0.1, "grad_norm": 7.536654949188232, "learning_rate": 1.6280193236714975e-06, "loss": 0.7354, "step": 674 }, { "epoch": 0.1, "grad_norm": 7.303921699523926, "learning_rate": 1.6304347826086955e-06, "loss": 0.7607, "step": 675 }, { "epoch": 0.1, "grad_norm": 7.611324310302734, "learning_rate": 1.6328502415458936e-06, "loss": 0.7438, "step": 676 }, { "epoch": 0.1, "grad_norm": 7.628655910491943, "learning_rate": 1.6352657004830916e-06, "loss": 0.8544, "step": 677 }, { "epoch": 0.1, "grad_norm": 7.026223659515381, "learning_rate": 1.6376811594202897e-06, "loss": 0.7786, "step": 678 }, { "epoch": 0.1, "grad_norm": 8.211602210998535, "learning_rate": 1.6400966183574877e-06, "loss": 0.8678, "step": 679 }, { "epoch": 0.1, "grad_norm": 7.053993225097656, "learning_rate": 1.642512077294686e-06, "loss": 0.8413, "step": 680 }, { "epoch": 0.1, "grad_norm": 7.485652446746826, "learning_rate": 1.644927536231884e-06, "loss": 0.8119, "step": 681 }, { "epoch": 0.1, "grad_norm": 7.407765865325928, "learning_rate": 1.647342995169082e-06, "loss": 0.8688, "step": 682 }, { "epoch": 0.1, "grad_norm": 7.515071392059326, "learning_rate": 1.6497584541062801e-06, "loss": 0.8002, "step": 683 }, { "epoch": 0.1, "grad_norm": 7.217519760131836, "learning_rate": 1.6521739130434782e-06, "loss": 0.7646, "step": 684 }, { "epoch": 0.1, "grad_norm": 7.455087661743164, "learning_rate": 1.6545893719806762e-06, "loss": 0.7869, "step": 685 }, { "epoch": 0.1, "grad_norm": 7.065547943115234, "learning_rate": 1.6570048309178743e-06, "loss": 0.7334, "step": 686 }, { "epoch": 0.1, "grad_norm": 8.426155090332031, "learning_rate": 1.6594202898550725e-06, "loss": 0.8131, "step": 687 }, { "epoch": 0.1, "grad_norm": 7.348057746887207, "learning_rate": 1.6618357487922706e-06, "loss": 0.7301, "step": 688 }, { "epoch": 0.1, "grad_norm": 7.532543659210205, "learning_rate": 1.6642512077294686e-06, "loss": 0.8029, "step": 689 }, { "epoch": 0.1, "grad_norm": 7.286167144775391, "learning_rate": 1.6666666666666667e-06, "loss": 0.7617, "step": 690 }, { "epoch": 0.1, "grad_norm": 7.580615997314453, "learning_rate": 1.6690821256038647e-06, "loss": 0.7221, "step": 691 }, { "epoch": 0.1, "grad_norm": 8.196526527404785, "learning_rate": 1.6714975845410628e-06, "loss": 0.7528, "step": 692 }, { "epoch": 0.1, "grad_norm": 7.7211809158325195, "learning_rate": 1.6739130434782608e-06, "loss": 0.7651, "step": 693 }, { "epoch": 0.1, "grad_norm": 7.386276721954346, "learning_rate": 1.6763285024154589e-06, "loss": 0.8019, "step": 694 }, { "epoch": 0.1, "grad_norm": 6.790566921234131, "learning_rate": 1.678743961352657e-06, "loss": 0.7869, "step": 695 }, { "epoch": 0.1, "grad_norm": 7.01471471786499, "learning_rate": 1.681159420289855e-06, "loss": 0.7776, "step": 696 }, { "epoch": 0.1, "grad_norm": 6.777046203613281, "learning_rate": 1.683574879227053e-06, "loss": 0.7226, "step": 697 }, { "epoch": 0.1, "grad_norm": 7.075949668884277, "learning_rate": 1.685990338164251e-06, "loss": 0.8331, "step": 698 }, { "epoch": 0.1, "grad_norm": 7.810980796813965, "learning_rate": 1.688405797101449e-06, "loss": 0.7062, "step": 699 }, { "epoch": 0.1, "grad_norm": 7.129891395568848, "learning_rate": 1.6908212560386471e-06, "loss": 0.6971, "step": 700 }, { "epoch": 0.1, "grad_norm": 7.65781831741333, "learning_rate": 1.6932367149758454e-06, "loss": 0.8462, "step": 701 }, { "epoch": 0.1, "grad_norm": 7.034817218780518, "learning_rate": 1.6956521739130435e-06, "loss": 0.6658, "step": 702 }, { "epoch": 0.1, "grad_norm": 7.926301002502441, "learning_rate": 1.6980676328502415e-06, "loss": 0.793, "step": 703 }, { "epoch": 0.1, "grad_norm": 7.450880527496338, "learning_rate": 1.7004830917874396e-06, "loss": 0.7716, "step": 704 }, { "epoch": 0.1, "grad_norm": 7.059274196624756, "learning_rate": 1.7028985507246376e-06, "loss": 0.796, "step": 705 }, { "epoch": 0.1, "grad_norm": 7.187228679656982, "learning_rate": 1.7053140096618357e-06, "loss": 0.7115, "step": 706 }, { "epoch": 0.1, "grad_norm": 7.849295616149902, "learning_rate": 1.7077294685990337e-06, "loss": 0.7523, "step": 707 }, { "epoch": 0.1, "grad_norm": 7.64375114440918, "learning_rate": 1.7101449275362317e-06, "loss": 0.7958, "step": 708 }, { "epoch": 0.1, "grad_norm": 7.568752765655518, "learning_rate": 1.71256038647343e-06, "loss": 0.8439, "step": 709 }, { "epoch": 0.1, "grad_norm": 7.425972938537598, "learning_rate": 1.714975845410628e-06, "loss": 0.7572, "step": 710 }, { "epoch": 0.1, "grad_norm": 7.045616149902344, "learning_rate": 1.7173913043478261e-06, "loss": 0.6801, "step": 711 }, { "epoch": 0.1, "grad_norm": 7.263082027435303, "learning_rate": 1.7198067632850242e-06, "loss": 0.7946, "step": 712 }, { "epoch": 0.1, "grad_norm": 7.6694416999816895, "learning_rate": 1.7222222222222222e-06, "loss": 0.7888, "step": 713 }, { "epoch": 0.1, "grad_norm": 7.025538921356201, "learning_rate": 1.7246376811594203e-06, "loss": 0.824, "step": 714 }, { "epoch": 0.1, "grad_norm": 6.832077503204346, "learning_rate": 1.727053140096618e-06, "loss": 0.7787, "step": 715 }, { "epoch": 0.1, "grad_norm": 7.917798042297363, "learning_rate": 1.7294685990338163e-06, "loss": 0.8646, "step": 716 }, { "epoch": 0.1, "grad_norm": 6.958597660064697, "learning_rate": 1.7318840579710144e-06, "loss": 0.732, "step": 717 }, { "epoch": 0.1, "grad_norm": 6.940836429595947, "learning_rate": 1.7342995169082124e-06, "loss": 0.6975, "step": 718 }, { "epoch": 0.1, "grad_norm": 6.801760673522949, "learning_rate": 1.7367149758454105e-06, "loss": 0.719, "step": 719 }, { "epoch": 0.1, "grad_norm": 7.3323493003845215, "learning_rate": 1.7391304347826085e-06, "loss": 0.7529, "step": 720 }, { "epoch": 0.1, "grad_norm": 7.0480732917785645, "learning_rate": 1.7415458937198066e-06, "loss": 0.7786, "step": 721 }, { "epoch": 0.1, "grad_norm": 6.959286689758301, "learning_rate": 1.7439613526570046e-06, "loss": 0.7522, "step": 722 }, { "epoch": 0.1, "grad_norm": 7.168455123901367, "learning_rate": 1.746376811594203e-06, "loss": 0.7777, "step": 723 }, { "epoch": 0.11, "grad_norm": 7.449641704559326, "learning_rate": 1.748792270531401e-06, "loss": 0.8021, "step": 724 }, { "epoch": 0.11, "grad_norm": 6.69654655456543, "learning_rate": 1.751207729468599e-06, "loss": 0.7978, "step": 725 }, { "epoch": 0.11, "grad_norm": 7.402050971984863, "learning_rate": 1.753623188405797e-06, "loss": 0.8189, "step": 726 }, { "epoch": 0.11, "grad_norm": 7.04074239730835, "learning_rate": 1.756038647342995e-06, "loss": 0.8382, "step": 727 }, { "epoch": 0.11, "grad_norm": 8.427299499511719, "learning_rate": 1.7584541062801931e-06, "loss": 0.8905, "step": 728 }, { "epoch": 0.11, "grad_norm": 8.126757621765137, "learning_rate": 1.7608695652173912e-06, "loss": 0.8346, "step": 729 }, { "epoch": 0.11, "grad_norm": 7.9005327224731445, "learning_rate": 1.7632850241545895e-06, "loss": 0.9024, "step": 730 }, { "epoch": 0.11, "grad_norm": 7.27114725112915, "learning_rate": 1.7657004830917875e-06, "loss": 0.8015, "step": 731 }, { "epoch": 0.11, "grad_norm": 7.833521366119385, "learning_rate": 1.7681159420289855e-06, "loss": 0.8261, "step": 732 }, { "epoch": 0.11, "grad_norm": 7.757767677307129, "learning_rate": 1.7705314009661836e-06, "loss": 0.8104, "step": 733 }, { "epoch": 0.11, "grad_norm": 7.051495552062988, "learning_rate": 1.7729468599033816e-06, "loss": 0.7748, "step": 734 }, { "epoch": 0.11, "grad_norm": 6.411279201507568, "learning_rate": 1.7753623188405795e-06, "loss": 0.717, "step": 735 }, { "epoch": 0.11, "grad_norm": 8.156513214111328, "learning_rate": 1.7777777777777775e-06, "loss": 0.8386, "step": 736 }, { "epoch": 0.11, "grad_norm": 7.043650150299072, "learning_rate": 1.7801932367149756e-06, "loss": 0.7782, "step": 737 }, { "epoch": 0.11, "grad_norm": 6.800252914428711, "learning_rate": 1.7826086956521738e-06, "loss": 0.733, "step": 738 }, { "epoch": 0.11, "grad_norm": 7.420692443847656, "learning_rate": 1.7850241545893719e-06, "loss": 0.7859, "step": 739 }, { "epoch": 0.11, "grad_norm": 6.796139717102051, "learning_rate": 1.78743961352657e-06, "loss": 0.6599, "step": 740 }, { "epoch": 0.11, "grad_norm": 8.852717399597168, "learning_rate": 1.789855072463768e-06, "loss": 0.7532, "step": 741 }, { "epoch": 0.11, "grad_norm": 7.47841739654541, "learning_rate": 1.792270531400966e-06, "loss": 0.8448, "step": 742 }, { "epoch": 0.11, "grad_norm": 7.287054538726807, "learning_rate": 1.794685990338164e-06, "loss": 0.7802, "step": 743 }, { "epoch": 0.11, "grad_norm": 7.5745415687561035, "learning_rate": 1.7971014492753621e-06, "loss": 0.7313, "step": 744 }, { "epoch": 0.11, "grad_norm": 6.276320934295654, "learning_rate": 1.7995169082125604e-06, "loss": 0.699, "step": 745 }, { "epoch": 0.11, "grad_norm": 8.191452980041504, "learning_rate": 1.8019323671497584e-06, "loss": 0.8638, "step": 746 }, { "epoch": 0.11, "grad_norm": 7.008790493011475, "learning_rate": 1.8043478260869565e-06, "loss": 0.778, "step": 747 }, { "epoch": 0.11, "grad_norm": 7.422776222229004, "learning_rate": 1.8067632850241545e-06, "loss": 0.7811, "step": 748 }, { "epoch": 0.11, "grad_norm": 7.647509574890137, "learning_rate": 1.8091787439613526e-06, "loss": 0.7869, "step": 749 }, { "epoch": 0.11, "grad_norm": 7.4003729820251465, "learning_rate": 1.8115942028985506e-06, "loss": 0.7357, "step": 750 }, { "epoch": 0.11, "grad_norm": 7.142919063568115, "learning_rate": 1.8140096618357487e-06, "loss": 0.8403, "step": 751 }, { "epoch": 0.11, "grad_norm": 7.10630464553833, "learning_rate": 1.816425120772947e-06, "loss": 0.793, "step": 752 }, { "epoch": 0.11, "grad_norm": 7.411191463470459, "learning_rate": 1.818840579710145e-06, "loss": 0.7367, "step": 753 }, { "epoch": 0.11, "grad_norm": 7.4041643142700195, "learning_rate": 1.821256038647343e-06, "loss": 0.8108, "step": 754 }, { "epoch": 0.11, "grad_norm": 7.136727809906006, "learning_rate": 1.823671497584541e-06, "loss": 0.72, "step": 755 }, { "epoch": 0.11, "grad_norm": 6.708113670349121, "learning_rate": 1.826086956521739e-06, "loss": 0.7825, "step": 756 }, { "epoch": 0.11, "grad_norm": 7.276269435882568, "learning_rate": 1.828502415458937e-06, "loss": 0.9067, "step": 757 }, { "epoch": 0.11, "grad_norm": 7.114874839782715, "learning_rate": 1.830917874396135e-06, "loss": 0.7735, "step": 758 }, { "epoch": 0.11, "grad_norm": 6.549976825714111, "learning_rate": 1.833333333333333e-06, "loss": 0.7987, "step": 759 }, { "epoch": 0.11, "grad_norm": 7.87067985534668, "learning_rate": 1.8357487922705313e-06, "loss": 0.8281, "step": 760 }, { "epoch": 0.11, "grad_norm": 7.653433322906494, "learning_rate": 1.8381642512077294e-06, "loss": 0.8404, "step": 761 }, { "epoch": 0.11, "grad_norm": 7.8490519523620605, "learning_rate": 1.8405797101449274e-06, "loss": 0.8456, "step": 762 }, { "epoch": 0.11, "grad_norm": 7.265737533569336, "learning_rate": 1.8429951690821255e-06, "loss": 0.6729, "step": 763 }, { "epoch": 0.11, "grad_norm": 7.195828914642334, "learning_rate": 1.8454106280193235e-06, "loss": 0.8477, "step": 764 }, { "epoch": 0.11, "grad_norm": 6.724880695343018, "learning_rate": 1.8478260869565216e-06, "loss": 0.703, "step": 765 }, { "epoch": 0.11, "grad_norm": 7.25720739364624, "learning_rate": 1.8502415458937196e-06, "loss": 0.7367, "step": 766 }, { "epoch": 0.11, "grad_norm": 7.152642250061035, "learning_rate": 1.8526570048309179e-06, "loss": 0.8697, "step": 767 }, { "epoch": 0.11, "grad_norm": 7.6313395500183105, "learning_rate": 1.855072463768116e-06, "loss": 0.7869, "step": 768 }, { "epoch": 0.11, "grad_norm": 7.244140148162842, "learning_rate": 1.857487922705314e-06, "loss": 0.8674, "step": 769 }, { "epoch": 0.11, "grad_norm": 7.339810371398926, "learning_rate": 1.859903381642512e-06, "loss": 0.7495, "step": 770 }, { "epoch": 0.11, "grad_norm": 6.5279130935668945, "learning_rate": 1.86231884057971e-06, "loss": 0.7489, "step": 771 }, { "epoch": 0.11, "grad_norm": 7.72755241394043, "learning_rate": 1.8647342995169081e-06, "loss": 0.8136, "step": 772 }, { "epoch": 0.11, "grad_norm": 6.951262474060059, "learning_rate": 1.8671497584541062e-06, "loss": 0.7152, "step": 773 }, { "epoch": 0.11, "grad_norm": 7.259551048278809, "learning_rate": 1.8695652173913044e-06, "loss": 0.7712, "step": 774 }, { "epoch": 0.11, "grad_norm": 6.9493632316589355, "learning_rate": 1.8719806763285025e-06, "loss": 0.814, "step": 775 }, { "epoch": 0.11, "grad_norm": 7.776601791381836, "learning_rate": 1.8743961352657005e-06, "loss": 0.7884, "step": 776 }, { "epoch": 0.11, "grad_norm": 7.044103145599365, "learning_rate": 1.8768115942028984e-06, "loss": 0.7747, "step": 777 }, { "epoch": 0.11, "grad_norm": 7.50429630279541, "learning_rate": 1.8792270531400964e-06, "loss": 0.7214, "step": 778 }, { "epoch": 0.11, "grad_norm": 7.172595024108887, "learning_rate": 1.8816425120772945e-06, "loss": 0.8642, "step": 779 }, { "epoch": 0.11, "grad_norm": 6.736869812011719, "learning_rate": 1.8840579710144925e-06, "loss": 0.7735, "step": 780 }, { "epoch": 0.11, "grad_norm": 7.850686073303223, "learning_rate": 1.8864734299516908e-06, "loss": 0.742, "step": 781 }, { "epoch": 0.11, "grad_norm": 7.1776442527771, "learning_rate": 1.8888888888888888e-06, "loss": 0.8639, "step": 782 }, { "epoch": 0.11, "grad_norm": 8.007955551147461, "learning_rate": 1.8913043478260869e-06, "loss": 0.809, "step": 783 }, { "epoch": 0.11, "grad_norm": 7.0805344581604, "learning_rate": 1.893719806763285e-06, "loss": 0.7647, "step": 784 }, { "epoch": 0.11, "grad_norm": 7.013727188110352, "learning_rate": 1.896135265700483e-06, "loss": 0.7454, "step": 785 }, { "epoch": 0.11, "grad_norm": 7.133039474487305, "learning_rate": 1.898550724637681e-06, "loss": 0.7325, "step": 786 }, { "epoch": 0.11, "grad_norm": 6.895448684692383, "learning_rate": 1.900966183574879e-06, "loss": 0.7612, "step": 787 }, { "epoch": 0.11, "grad_norm": 6.943808078765869, "learning_rate": 1.9033816425120771e-06, "loss": 0.8325, "step": 788 }, { "epoch": 0.11, "grad_norm": 6.992349624633789, "learning_rate": 1.9057971014492754e-06, "loss": 0.8334, "step": 789 }, { "epoch": 0.11, "grad_norm": 6.970850467681885, "learning_rate": 1.9082125603864732e-06, "loss": 0.7927, "step": 790 }, { "epoch": 0.11, "grad_norm": 7.288712501525879, "learning_rate": 1.9106280193236713e-06, "loss": 0.7924, "step": 791 }, { "epoch": 0.11, "grad_norm": 7.4876861572265625, "learning_rate": 1.9130434782608697e-06, "loss": 0.7946, "step": 792 }, { "epoch": 0.12, "grad_norm": 7.227801322937012, "learning_rate": 1.9154589371980678e-06, "loss": 0.7902, "step": 793 }, { "epoch": 0.12, "grad_norm": 7.847288131713867, "learning_rate": 1.917874396135266e-06, "loss": 0.8201, "step": 794 }, { "epoch": 0.12, "grad_norm": 7.481346607208252, "learning_rate": 1.920289855072464e-06, "loss": 0.7615, "step": 795 }, { "epoch": 0.12, "grad_norm": 6.781558036804199, "learning_rate": 1.922705314009662e-06, "loss": 0.7305, "step": 796 }, { "epoch": 0.12, "grad_norm": 7.188095569610596, "learning_rate": 1.9251207729468596e-06, "loss": 0.751, "step": 797 }, { "epoch": 0.12, "grad_norm": 7.03822660446167, "learning_rate": 1.9275362318840576e-06, "loss": 0.8279, "step": 798 }, { "epoch": 0.12, "grad_norm": 7.557547092437744, "learning_rate": 1.9299516908212557e-06, "loss": 0.8434, "step": 799 }, { "epoch": 0.12, "grad_norm": 7.124966144561768, "learning_rate": 1.932367149758454e-06, "loss": 0.8203, "step": 800 }, { "epoch": 0.12, "grad_norm": 7.467984199523926, "learning_rate": 1.934782608695652e-06, "loss": 0.7954, "step": 801 }, { "epoch": 0.12, "grad_norm": 7.094202041625977, "learning_rate": 1.9371980676328502e-06, "loss": 0.7683, "step": 802 }, { "epoch": 0.12, "grad_norm": 7.222095966339111, "learning_rate": 1.9396135265700483e-06, "loss": 0.7736, "step": 803 }, { "epoch": 0.12, "grad_norm": 8.856788635253906, "learning_rate": 1.9420289855072463e-06, "loss": 0.8849, "step": 804 }, { "epoch": 0.12, "grad_norm": 7.304723262786865, "learning_rate": 1.9444444444444444e-06, "loss": 0.7888, "step": 805 }, { "epoch": 0.12, "grad_norm": 7.098222255706787, "learning_rate": 1.9468599033816424e-06, "loss": 0.7901, "step": 806 }, { "epoch": 0.12, "grad_norm": 7.277933597564697, "learning_rate": 1.9492753623188405e-06, "loss": 0.7947, "step": 807 }, { "epoch": 0.12, "grad_norm": 6.94703483581543, "learning_rate": 1.9516908212560385e-06, "loss": 0.7425, "step": 808 }, { "epoch": 0.12, "grad_norm": 7.556836128234863, "learning_rate": 1.9541062801932366e-06, "loss": 0.7915, "step": 809 }, { "epoch": 0.12, "grad_norm": 7.213542938232422, "learning_rate": 1.9565217391304346e-06, "loss": 0.8279, "step": 810 }, { "epoch": 0.12, "grad_norm": 6.537713050842285, "learning_rate": 1.9589371980676327e-06, "loss": 0.7581, "step": 811 }, { "epoch": 0.12, "grad_norm": 7.898279190063477, "learning_rate": 1.9613526570048307e-06, "loss": 0.7685, "step": 812 }, { "epoch": 0.12, "grad_norm": 7.585102081298828, "learning_rate": 1.9637681159420288e-06, "loss": 0.7884, "step": 813 }, { "epoch": 0.12, "grad_norm": 7.213348865509033, "learning_rate": 1.9661835748792272e-06, "loss": 0.8536, "step": 814 }, { "epoch": 0.12, "grad_norm": 7.188924312591553, "learning_rate": 1.9685990338164253e-06, "loss": 0.8041, "step": 815 }, { "epoch": 0.12, "grad_norm": 6.970036506652832, "learning_rate": 1.9710144927536233e-06, "loss": 0.6456, "step": 816 }, { "epoch": 0.12, "grad_norm": 7.36569356918335, "learning_rate": 1.9734299516908214e-06, "loss": 0.7915, "step": 817 }, { "epoch": 0.12, "grad_norm": 7.583734512329102, "learning_rate": 1.975845410628019e-06, "loss": 0.8996, "step": 818 }, { "epoch": 0.12, "grad_norm": 6.750718116760254, "learning_rate": 1.978260869565217e-06, "loss": 0.7942, "step": 819 }, { "epoch": 0.12, "grad_norm": 7.1419782638549805, "learning_rate": 1.980676328502415e-06, "loss": 0.7231, "step": 820 }, { "epoch": 0.12, "grad_norm": 7.317028045654297, "learning_rate": 1.9830917874396136e-06, "loss": 0.8238, "step": 821 }, { "epoch": 0.12, "grad_norm": 7.853755950927734, "learning_rate": 1.9855072463768116e-06, "loss": 0.8611, "step": 822 }, { "epoch": 0.12, "grad_norm": 6.918148994445801, "learning_rate": 1.9879227053140097e-06, "loss": 0.7294, "step": 823 }, { "epoch": 0.12, "grad_norm": 7.317668437957764, "learning_rate": 1.9903381642512077e-06, "loss": 0.7833, "step": 824 }, { "epoch": 0.12, "grad_norm": 6.835295677185059, "learning_rate": 1.9927536231884058e-06, "loss": 0.7731, "step": 825 }, { "epoch": 0.12, "grad_norm": 7.018136978149414, "learning_rate": 1.995169082125604e-06, "loss": 0.7605, "step": 826 }, { "epoch": 0.12, "grad_norm": 7.833322525024414, "learning_rate": 1.997584541062802e-06, "loss": 0.7098, "step": 827 }, { "epoch": 0.12, "grad_norm": 8.46504020690918, "learning_rate": 2e-06, "loss": 0.7909, "step": 828 }, { "epoch": 0.12, "grad_norm": 6.988455772399902, "learning_rate": 1.9999999874733122e-06, "loss": 0.8048, "step": 829 }, { "epoch": 0.12, "grad_norm": 7.32778787612915, "learning_rate": 1.9999999498932496e-06, "loss": 0.8175, "step": 830 }, { "epoch": 0.12, "grad_norm": 6.664885997772217, "learning_rate": 1.9999998872598134e-06, "loss": 0.7892, "step": 831 }, { "epoch": 0.12, "grad_norm": 6.987238883972168, "learning_rate": 1.999999799573005e-06, "loss": 0.7684, "step": 832 }, { "epoch": 0.12, "grad_norm": 7.0742902755737305, "learning_rate": 1.999999686832826e-06, "loss": 0.7141, "step": 833 }, { "epoch": 0.12, "grad_norm": 7.050313472747803, "learning_rate": 1.9999995490392797e-06, "loss": 0.7471, "step": 834 }, { "epoch": 0.12, "grad_norm": 7.761480808258057, "learning_rate": 1.9999993861923697e-06, "loss": 0.7734, "step": 835 }, { "epoch": 0.12, "grad_norm": 7.212278366088867, "learning_rate": 1.9999991982921e-06, "loss": 0.7135, "step": 836 }, { "epoch": 0.12, "grad_norm": 7.075423240661621, "learning_rate": 1.9999989853384747e-06, "loss": 0.8307, "step": 837 }, { "epoch": 0.12, "grad_norm": 6.975811004638672, "learning_rate": 1.9999987473315004e-06, "loss": 0.7655, "step": 838 }, { "epoch": 0.12, "grad_norm": 7.010690212249756, "learning_rate": 1.9999984842711817e-06, "loss": 0.8267, "step": 839 }, { "epoch": 0.12, "grad_norm": 6.818356513977051, "learning_rate": 1.9999981961575262e-06, "loss": 0.7506, "step": 840 }, { "epoch": 0.12, "grad_norm": 6.930919647216797, "learning_rate": 1.9999978829905407e-06, "loss": 0.7289, "step": 841 }, { "epoch": 0.12, "grad_norm": 6.42098331451416, "learning_rate": 1.9999975447702328e-06, "loss": 0.7699, "step": 842 }, { "epoch": 0.12, "grad_norm": 7.111569404602051, "learning_rate": 1.9999971814966113e-06, "loss": 0.7586, "step": 843 }, { "epoch": 0.12, "grad_norm": 7.3145928382873535, "learning_rate": 1.9999967931696853e-06, "loss": 0.8861, "step": 844 }, { "epoch": 0.12, "grad_norm": 7.702468395233154, "learning_rate": 1.9999963797894644e-06, "loss": 0.7982, "step": 845 }, { "epoch": 0.12, "grad_norm": 7.33725643157959, "learning_rate": 1.999995941355959e-06, "loss": 0.7832, "step": 846 }, { "epoch": 0.12, "grad_norm": 7.579097747802734, "learning_rate": 1.9999954778691802e-06, "loss": 0.7907, "step": 847 }, { "epoch": 0.12, "grad_norm": 8.02838134765625, "learning_rate": 1.99999498932914e-06, "loss": 0.7244, "step": 848 }, { "epoch": 0.12, "grad_norm": 7.9815144538879395, "learning_rate": 1.999994475735849e-06, "loss": 0.7591, "step": 849 }, { "epoch": 0.12, "grad_norm": 7.011471748352051, "learning_rate": 1.9999939370893224e-06, "loss": 0.8142, "step": 850 }, { "epoch": 0.12, "grad_norm": 7.576908588409424, "learning_rate": 1.999993373389572e-06, "loss": 0.8336, "step": 851 }, { "epoch": 0.12, "grad_norm": 7.457164764404297, "learning_rate": 1.9999927846366126e-06, "loss": 0.7799, "step": 852 }, { "epoch": 0.12, "grad_norm": 7.270396709442139, "learning_rate": 1.9999921708304587e-06, "loss": 0.6832, "step": 853 }, { "epoch": 0.12, "grad_norm": 7.563149452209473, "learning_rate": 1.9999915319711256e-06, "loss": 0.8008, "step": 854 }, { "epoch": 0.12, "grad_norm": 8.246105194091797, "learning_rate": 1.9999908680586297e-06, "loss": 0.8213, "step": 855 }, { "epoch": 0.12, "grad_norm": 8.040163040161133, "learning_rate": 1.9999901790929872e-06, "loss": 0.7876, "step": 856 }, { "epoch": 0.12, "grad_norm": 7.12648344039917, "learning_rate": 1.9999894650742163e-06, "loss": 0.7937, "step": 857 }, { "epoch": 0.12, "grad_norm": 6.953192710876465, "learning_rate": 1.9999887260023335e-06, "loss": 0.7016, "step": 858 }, { "epoch": 0.12, "grad_norm": 7.5381879806518555, "learning_rate": 1.999987961877358e-06, "loss": 0.7616, "step": 859 }, { "epoch": 0.12, "grad_norm": 7.287964344024658, "learning_rate": 1.999987172699309e-06, "loss": 0.7734, "step": 860 }, { "epoch": 0.12, "grad_norm": 7.077799320220947, "learning_rate": 1.9999863584682065e-06, "loss": 0.7612, "step": 861 }, { "epoch": 0.13, "grad_norm": 7.6215338706970215, "learning_rate": 1.99998551918407e-06, "loss": 0.8489, "step": 862 }, { "epoch": 0.13, "grad_norm": 7.478134632110596, "learning_rate": 1.9999846548469215e-06, "loss": 0.77, "step": 863 }, { "epoch": 0.13, "grad_norm": 7.688816547393799, "learning_rate": 1.999983765456782e-06, "loss": 0.8449, "step": 864 }, { "epoch": 0.13, "grad_norm": 8.364445686340332, "learning_rate": 1.9999828510136746e-06, "loss": 0.7663, "step": 865 }, { "epoch": 0.13, "grad_norm": 6.752935886383057, "learning_rate": 1.9999819115176215e-06, "loss": 0.7205, "step": 866 }, { "epoch": 0.13, "grad_norm": 6.915037155151367, "learning_rate": 1.9999809469686457e-06, "loss": 0.7212, "step": 867 }, { "epoch": 0.13, "grad_norm": 6.337381839752197, "learning_rate": 1.9999799573667733e-06, "loss": 0.7876, "step": 868 }, { "epoch": 0.13, "grad_norm": 6.959118366241455, "learning_rate": 1.999978942712027e-06, "loss": 0.8272, "step": 869 }, { "epoch": 0.13, "grad_norm": 6.419911861419678, "learning_rate": 1.9999779030044333e-06, "loss": 0.7521, "step": 870 }, { "epoch": 0.13, "grad_norm": 6.855642795562744, "learning_rate": 1.999976838244018e-06, "loss": 0.8198, "step": 871 }, { "epoch": 0.13, "grad_norm": 7.05889368057251, "learning_rate": 1.9999757484308073e-06, "loss": 0.7671, "step": 872 }, { "epoch": 0.13, "grad_norm": 7.261213302612305, "learning_rate": 1.9999746335648296e-06, "loss": 0.7701, "step": 873 }, { "epoch": 0.13, "grad_norm": 6.988398551940918, "learning_rate": 1.9999734936461127e-06, "loss": 0.739, "step": 874 }, { "epoch": 0.13, "grad_norm": 8.28572940826416, "learning_rate": 1.999972328674684e-06, "loss": 0.81, "step": 875 }, { "epoch": 0.13, "grad_norm": 6.649571895599365, "learning_rate": 1.9999711386505734e-06, "loss": 0.6997, "step": 876 }, { "epoch": 0.13, "grad_norm": 6.467655181884766, "learning_rate": 1.9999699235738112e-06, "loss": 0.6899, "step": 877 }, { "epoch": 0.13, "grad_norm": 7.268915176391602, "learning_rate": 1.9999686834444267e-06, "loss": 0.7411, "step": 878 }, { "epoch": 0.13, "grad_norm": 7.1048150062561035, "learning_rate": 1.999967418262452e-06, "loss": 0.734, "step": 879 }, { "epoch": 0.13, "grad_norm": 7.265988349914551, "learning_rate": 1.9999661280279185e-06, "loss": 0.7155, "step": 880 }, { "epoch": 0.13, "grad_norm": 7.070664405822754, "learning_rate": 1.999964812740858e-06, "loss": 0.7286, "step": 881 }, { "epoch": 0.13, "grad_norm": 7.251183986663818, "learning_rate": 1.9999634724013043e-06, "loss": 0.834, "step": 882 }, { "epoch": 0.13, "grad_norm": 6.926445484161377, "learning_rate": 1.9999621070092905e-06, "loss": 0.7217, "step": 883 }, { "epoch": 0.13, "grad_norm": 7.33603572845459, "learning_rate": 1.999960716564851e-06, "loss": 0.8188, "step": 884 }, { "epoch": 0.13, "grad_norm": 7.551576614379883, "learning_rate": 1.99995930106802e-06, "loss": 0.7075, "step": 885 }, { "epoch": 0.13, "grad_norm": 7.9671525955200195, "learning_rate": 1.999957860518834e-06, "loss": 0.8419, "step": 886 }, { "epoch": 0.13, "grad_norm": 7.965726852416992, "learning_rate": 1.999956394917328e-06, "loss": 0.7534, "step": 887 }, { "epoch": 0.13, "grad_norm": 7.542666435241699, "learning_rate": 1.9999549042635397e-06, "loss": 0.8041, "step": 888 }, { "epoch": 0.13, "grad_norm": 6.965225696563721, "learning_rate": 1.9999533885575063e-06, "loss": 0.7423, "step": 889 }, { "epoch": 0.13, "grad_norm": 8.453461647033691, "learning_rate": 1.999951847799265e-06, "loss": 0.9672, "step": 890 }, { "epoch": 0.13, "grad_norm": 6.975613117218018, "learning_rate": 1.999950281988855e-06, "loss": 0.7798, "step": 891 }, { "epoch": 0.13, "grad_norm": 7.121090412139893, "learning_rate": 1.9999486911263153e-06, "loss": 0.8042, "step": 892 }, { "epoch": 0.13, "grad_norm": 6.733061790466309, "learning_rate": 1.9999470752116865e-06, "loss": 0.8599, "step": 893 }, { "epoch": 0.13, "grad_norm": 6.838613986968994, "learning_rate": 1.999945434245008e-06, "loss": 0.8099, "step": 894 }, { "epoch": 0.13, "grad_norm": 6.6267924308776855, "learning_rate": 1.9999437682263218e-06, "loss": 0.8335, "step": 895 }, { "epoch": 0.13, "grad_norm": 7.769452095031738, "learning_rate": 1.9999420771556687e-06, "loss": 0.7599, "step": 896 }, { "epoch": 0.13, "grad_norm": 7.591211795806885, "learning_rate": 1.9999403610330918e-06, "loss": 0.781, "step": 897 }, { "epoch": 0.13, "grad_norm": 6.332066059112549, "learning_rate": 1.999938619858634e-06, "loss": 0.7887, "step": 898 }, { "epoch": 0.13, "grad_norm": 7.4344916343688965, "learning_rate": 1.9999368536323383e-06, "loss": 0.6751, "step": 899 }, { "epoch": 0.13, "grad_norm": 6.353713035583496, "learning_rate": 1.99993506235425e-06, "loss": 0.7695, "step": 900 }, { "epoch": 0.13, "grad_norm": 7.36722469329834, "learning_rate": 1.999933246024413e-06, "loss": 0.7986, "step": 901 }, { "epoch": 0.13, "grad_norm": 6.344376087188721, "learning_rate": 1.9999314046428735e-06, "loss": 0.7089, "step": 902 }, { "epoch": 0.13, "grad_norm": 6.844686985015869, "learning_rate": 1.9999295382096774e-06, "loss": 0.8, "step": 903 }, { "epoch": 0.13, "grad_norm": 6.458288669586182, "learning_rate": 1.999927646724872e-06, "loss": 0.7923, "step": 904 }, { "epoch": 0.13, "grad_norm": 6.475215435028076, "learning_rate": 1.999925730188503e-06, "loss": 0.8048, "step": 905 }, { "epoch": 0.13, "grad_norm": 7.330568790435791, "learning_rate": 1.99992378860062e-06, "loss": 0.8117, "step": 906 }, { "epoch": 0.13, "grad_norm": 6.795989990234375, "learning_rate": 1.9999218219612714e-06, "loss": 0.749, "step": 907 }, { "epoch": 0.13, "grad_norm": 7.064602851867676, "learning_rate": 1.999919830270506e-06, "loss": 0.7418, "step": 908 }, { "epoch": 0.13, "grad_norm": 6.717015743255615, "learning_rate": 1.9999178135283743e-06, "loss": 0.8121, "step": 909 }, { "epoch": 0.13, "grad_norm": 6.737549781799316, "learning_rate": 1.9999157717349263e-06, "loss": 0.7892, "step": 910 }, { "epoch": 0.13, "grad_norm": 7.395997524261475, "learning_rate": 1.9999137048902133e-06, "loss": 0.7849, "step": 911 }, { "epoch": 0.13, "grad_norm": 7.49749755859375, "learning_rate": 1.9999116129942874e-06, "loss": 0.734, "step": 912 }, { "epoch": 0.13, "grad_norm": 6.724456787109375, "learning_rate": 1.9999094960472e-06, "loss": 0.7179, "step": 913 }, { "epoch": 0.13, "grad_norm": 7.463527202606201, "learning_rate": 1.9999073540490054e-06, "loss": 0.8342, "step": 914 }, { "epoch": 0.13, "grad_norm": 7.937229156494141, "learning_rate": 1.9999051869997565e-06, "loss": 0.7919, "step": 915 }, { "epoch": 0.13, "grad_norm": 6.344451427459717, "learning_rate": 1.999902994899508e-06, "loss": 0.6945, "step": 916 }, { "epoch": 0.13, "grad_norm": 7.5037431716918945, "learning_rate": 1.9999007777483145e-06, "loss": 0.8103, "step": 917 }, { "epoch": 0.13, "grad_norm": 6.522441864013672, "learning_rate": 1.9998985355462316e-06, "loss": 0.7021, "step": 918 }, { "epoch": 0.13, "grad_norm": 6.697798252105713, "learning_rate": 1.9998962682933158e-06, "loss": 0.8032, "step": 919 }, { "epoch": 0.13, "grad_norm": 7.971286296844482, "learning_rate": 1.9998939759896237e-06, "loss": 0.8505, "step": 920 }, { "epoch": 0.13, "grad_norm": 7.023707866668701, "learning_rate": 1.999891658635212e-06, "loss": 0.7099, "step": 921 }, { "epoch": 0.13, "grad_norm": 6.979548454284668, "learning_rate": 1.9998893162301404e-06, "loss": 0.7823, "step": 922 }, { "epoch": 0.13, "grad_norm": 7.130308628082275, "learning_rate": 1.999886948774466e-06, "loss": 0.821, "step": 923 }, { "epoch": 0.13, "grad_norm": 7.299970626831055, "learning_rate": 1.999884556268249e-06, "loss": 0.8016, "step": 924 }, { "epoch": 0.13, "grad_norm": 7.397519111633301, "learning_rate": 1.999882138711549e-06, "loss": 0.7128, "step": 925 }, { "epoch": 0.13, "grad_norm": 7.02589225769043, "learning_rate": 1.999879696104427e-06, "loss": 0.8028, "step": 926 }, { "epoch": 0.13, "grad_norm": 7.76304817199707, "learning_rate": 1.9998772284469434e-06, "loss": 0.8197, "step": 927 }, { "epoch": 0.13, "grad_norm": 6.987037181854248, "learning_rate": 1.999874735739161e-06, "loss": 0.83, "step": 928 }, { "epoch": 0.13, "grad_norm": 6.533684730529785, "learning_rate": 1.9998722179811407e-06, "loss": 0.8056, "step": 929 }, { "epoch": 0.13, "grad_norm": 7.126735210418701, "learning_rate": 1.9998696751729475e-06, "loss": 0.7714, "step": 930 }, { "epoch": 0.14, "grad_norm": 6.966559410095215, "learning_rate": 1.999867107314644e-06, "loss": 0.7575, "step": 931 }, { "epoch": 0.14, "grad_norm": 7.9832258224487305, "learning_rate": 1.9998645144062945e-06, "loss": 0.8782, "step": 932 }, { "epoch": 0.14, "grad_norm": 6.882324695587158, "learning_rate": 1.9998618964479643e-06, "loss": 0.6536, "step": 933 }, { "epoch": 0.14, "grad_norm": 7.675004482269287, "learning_rate": 1.9998592534397194e-06, "loss": 0.7598, "step": 934 }, { "epoch": 0.14, "grad_norm": 6.599270820617676, "learning_rate": 1.999856585381625e-06, "loss": 0.8015, "step": 935 }, { "epoch": 0.14, "grad_norm": 7.360358715057373, "learning_rate": 1.9998538922737485e-06, "loss": 0.7541, "step": 936 }, { "epoch": 0.14, "grad_norm": 6.243955135345459, "learning_rate": 1.9998511741161573e-06, "loss": 0.7972, "step": 937 }, { "epoch": 0.14, "grad_norm": 6.647517681121826, "learning_rate": 1.9998484309089193e-06, "loss": 0.7857, "step": 938 }, { "epoch": 0.14, "grad_norm": 6.704213619232178, "learning_rate": 1.9998456626521037e-06, "loss": 0.715, "step": 939 }, { "epoch": 0.14, "grad_norm": 6.871191024780273, "learning_rate": 1.99984286934578e-06, "loss": 0.7668, "step": 940 }, { "epoch": 0.14, "grad_norm": 6.937484264373779, "learning_rate": 1.9998400509900173e-06, "loss": 0.8282, "step": 941 }, { "epoch": 0.14, "grad_norm": 6.498072624206543, "learning_rate": 1.9998372075848867e-06, "loss": 0.6634, "step": 942 }, { "epoch": 0.14, "grad_norm": 7.107848644256592, "learning_rate": 1.9998343391304596e-06, "loss": 0.8083, "step": 943 }, { "epoch": 0.14, "grad_norm": 6.896372318267822, "learning_rate": 1.9998314456268073e-06, "loss": 0.6779, "step": 944 }, { "epoch": 0.14, "grad_norm": 7.100346088409424, "learning_rate": 1.9998285270740033e-06, "loss": 0.832, "step": 945 }, { "epoch": 0.14, "grad_norm": 6.746522903442383, "learning_rate": 1.9998255834721197e-06, "loss": 0.7626, "step": 946 }, { "epoch": 0.14, "grad_norm": 7.2114667892456055, "learning_rate": 1.9998226148212306e-06, "loss": 0.7845, "step": 947 }, { "epoch": 0.14, "grad_norm": 7.2370924949646, "learning_rate": 1.9998196211214106e-06, "loss": 0.7485, "step": 948 }, { "epoch": 0.14, "grad_norm": 8.59536075592041, "learning_rate": 1.9998166023727345e-06, "loss": 0.7297, "step": 949 }, { "epoch": 0.14, "grad_norm": 6.645427227020264, "learning_rate": 1.999813558575278e-06, "loss": 0.8056, "step": 950 }, { "epoch": 0.14, "grad_norm": 7.322451114654541, "learning_rate": 1.999810489729117e-06, "loss": 0.7759, "step": 951 }, { "epoch": 0.14, "grad_norm": 8.118412971496582, "learning_rate": 1.9998073958343293e-06, "loss": 0.7975, "step": 952 }, { "epoch": 0.14, "grad_norm": 6.737993240356445, "learning_rate": 1.9998042768909916e-06, "loss": 0.7966, "step": 953 }, { "epoch": 0.14, "grad_norm": 6.233902454376221, "learning_rate": 1.999801132899182e-06, "loss": 0.7994, "step": 954 }, { "epoch": 0.14, "grad_norm": 7.776464939117432, "learning_rate": 1.9997979638589797e-06, "loss": 0.8332, "step": 955 }, { "epoch": 0.14, "grad_norm": 7.586968421936035, "learning_rate": 1.999794769770464e-06, "loss": 0.7845, "step": 956 }, { "epoch": 0.14, "grad_norm": 6.831209659576416, "learning_rate": 1.9997915506337144e-06, "loss": 0.8374, "step": 957 }, { "epoch": 0.14, "grad_norm": 7.106581211090088, "learning_rate": 1.9997883064488127e-06, "loss": 0.7983, "step": 958 }, { "epoch": 0.14, "grad_norm": 7.369691848754883, "learning_rate": 1.999785037215839e-06, "loss": 0.7817, "step": 959 }, { "epoch": 0.14, "grad_norm": 6.632747173309326, "learning_rate": 1.9997817429348757e-06, "loss": 0.7453, "step": 960 }, { "epoch": 0.14, "grad_norm": 7.510539531707764, "learning_rate": 1.999778423606005e-06, "loss": 0.7834, "step": 961 }, { "epoch": 0.14, "grad_norm": 6.846020698547363, "learning_rate": 1.999775079229311e-06, "loss": 0.8001, "step": 962 }, { "epoch": 0.14, "grad_norm": 6.8550519943237305, "learning_rate": 1.9997717098048766e-06, "loss": 0.7311, "step": 963 }, { "epoch": 0.14, "grad_norm": 6.992377281188965, "learning_rate": 1.9997683153327864e-06, "loss": 0.7442, "step": 964 }, { "epoch": 0.14, "grad_norm": 6.856739044189453, "learning_rate": 1.9997648958131255e-06, "loss": 0.7529, "step": 965 }, { "epoch": 0.14, "grad_norm": 6.630317211151123, "learning_rate": 1.9997614512459796e-06, "loss": 0.7309, "step": 966 }, { "epoch": 0.14, "grad_norm": 5.981285572052002, "learning_rate": 1.999757981631435e-06, "loss": 0.6428, "step": 967 }, { "epoch": 0.14, "grad_norm": 6.904129505157471, "learning_rate": 1.9997544869695785e-06, "loss": 0.8442, "step": 968 }, { "epoch": 0.14, "grad_norm": 6.639620304107666, "learning_rate": 1.999750967260498e-06, "loss": 0.798, "step": 969 }, { "epoch": 0.14, "grad_norm": 7.026580810546875, "learning_rate": 1.9997474225042815e-06, "loss": 0.7643, "step": 970 }, { "epoch": 0.14, "grad_norm": 6.352241516113281, "learning_rate": 1.9997438527010175e-06, "loss": 0.7367, "step": 971 }, { "epoch": 0.14, "grad_norm": 7.001410007476807, "learning_rate": 1.999740257850796e-06, "loss": 0.7688, "step": 972 }, { "epoch": 0.14, "grad_norm": 6.36932897567749, "learning_rate": 1.999736637953706e-06, "loss": 0.7961, "step": 973 }, { "epoch": 0.14, "grad_norm": 7.407019138336182, "learning_rate": 1.99973299300984e-06, "loss": 0.7554, "step": 974 }, { "epoch": 0.14, "grad_norm": 7.699155807495117, "learning_rate": 1.999729323019287e-06, "loss": 0.8061, "step": 975 }, { "epoch": 0.14, "grad_norm": 7.191115856170654, "learning_rate": 1.9997256279821415e-06, "loss": 0.7061, "step": 976 }, { "epoch": 0.14, "grad_norm": 8.223348617553711, "learning_rate": 1.999721907898494e-06, "loss": 0.7552, "step": 977 }, { "epoch": 0.14, "grad_norm": 6.463339805603027, "learning_rate": 1.9997181627684386e-06, "loss": 0.7499, "step": 978 }, { "epoch": 0.14, "grad_norm": 7.8149824142456055, "learning_rate": 1.999714392592069e-06, "loss": 0.806, "step": 979 }, { "epoch": 0.14, "grad_norm": 7.4606146812438965, "learning_rate": 1.99971059736948e-06, "loss": 0.8387, "step": 980 }, { "epoch": 0.14, "grad_norm": 7.467471599578857, "learning_rate": 1.9997067771007656e-06, "loss": 0.8289, "step": 981 }, { "epoch": 0.14, "grad_norm": 6.783915042877197, "learning_rate": 1.999702931786023e-06, "loss": 0.744, "step": 982 }, { "epoch": 0.14, "grad_norm": 7.022887706756592, "learning_rate": 1.9996990614253473e-06, "loss": 0.7969, "step": 983 }, { "epoch": 0.14, "grad_norm": 6.964649677276611, "learning_rate": 1.9996951660188365e-06, "loss": 0.7299, "step": 984 }, { "epoch": 0.14, "grad_norm": 7.764372825622559, "learning_rate": 1.9996912455665867e-06, "loss": 0.9575, "step": 985 }, { "epoch": 0.14, "grad_norm": 7.1877665519714355, "learning_rate": 1.999687300068698e-06, "loss": 0.8511, "step": 986 }, { "epoch": 0.14, "grad_norm": 7.4144606590271, "learning_rate": 1.999683329525268e-06, "loss": 0.7898, "step": 987 }, { "epoch": 0.14, "grad_norm": 6.220948219299316, "learning_rate": 1.9996793339363966e-06, "loss": 0.825, "step": 988 }, { "epoch": 0.14, "grad_norm": 7.485213756561279, "learning_rate": 1.9996753133021835e-06, "loss": 0.7365, "step": 989 }, { "epoch": 0.14, "grad_norm": 6.465168476104736, "learning_rate": 1.99967126762273e-06, "loss": 0.7575, "step": 990 }, { "epoch": 0.14, "grad_norm": 7.964964389801025, "learning_rate": 1.999667196898137e-06, "loss": 0.78, "step": 991 }, { "epoch": 0.14, "grad_norm": 7.138269901275635, "learning_rate": 1.9996631011285067e-06, "loss": 0.8096, "step": 992 }, { "epoch": 0.14, "grad_norm": 7.042336940765381, "learning_rate": 1.9996589803139417e-06, "loss": 0.863, "step": 993 }, { "epoch": 0.14, "grad_norm": 6.63039493560791, "learning_rate": 1.9996548344545454e-06, "loss": 0.7615, "step": 994 }, { "epoch": 0.14, "grad_norm": 6.685534954071045, "learning_rate": 1.9996506635504214e-06, "loss": 0.8068, "step": 995 }, { "epoch": 0.14, "grad_norm": 6.520956039428711, "learning_rate": 1.999646467601674e-06, "loss": 0.7534, "step": 996 }, { "epoch": 0.14, "grad_norm": 6.946865558624268, "learning_rate": 1.999642246608409e-06, "loss": 0.8213, "step": 997 }, { "epoch": 0.14, "grad_norm": 7.908633232116699, "learning_rate": 1.9996380005707317e-06, "loss": 0.7167, "step": 998 }, { "epoch": 0.14, "grad_norm": 6.1641740798950195, "learning_rate": 1.9996337294887483e-06, "loss": 0.6993, "step": 999 }, { "epoch": 0.15, "grad_norm": 7.553565502166748, "learning_rate": 1.9996294333625663e-06, "loss": 0.7639, "step": 1000 }, { "epoch": 0.15, "grad_norm": 6.984685897827148, "learning_rate": 1.999625112192293e-06, "loss": 0.8125, "step": 1001 }, { "epoch": 0.15, "grad_norm": 6.177978038787842, "learning_rate": 1.9996207659780365e-06, "loss": 0.7665, "step": 1002 }, { "epoch": 0.15, "grad_norm": 6.2888593673706055, "learning_rate": 1.999616394719906e-06, "loss": 0.7385, "step": 1003 }, { "epoch": 0.15, "grad_norm": 7.543355941772461, "learning_rate": 1.999611998418011e-06, "loss": 0.8092, "step": 1004 }, { "epoch": 0.15, "grad_norm": 7.011266231536865, "learning_rate": 1.9996075770724617e-06, "loss": 0.8049, "step": 1005 }, { "epoch": 0.15, "grad_norm": 6.77280330657959, "learning_rate": 1.999603130683369e-06, "loss": 0.7447, "step": 1006 }, { "epoch": 0.15, "grad_norm": 6.563028335571289, "learning_rate": 1.9995986592508435e-06, "loss": 0.7899, "step": 1007 }, { "epoch": 0.15, "grad_norm": 7.2784552574157715, "learning_rate": 1.999594162774998e-06, "loss": 0.7679, "step": 1008 }, { "epoch": 0.15, "grad_norm": 6.478434085845947, "learning_rate": 1.999589641255945e-06, "loss": 0.748, "step": 1009 }, { "epoch": 0.15, "grad_norm": 7.2083306312561035, "learning_rate": 1.9995850946937977e-06, "loss": 0.8548, "step": 1010 }, { "epoch": 0.15, "grad_norm": 6.559256553649902, "learning_rate": 1.9995805230886696e-06, "loss": 0.7241, "step": 1011 }, { "epoch": 0.15, "grad_norm": 6.489805221557617, "learning_rate": 1.999575926440676e-06, "loss": 0.7799, "step": 1012 }, { "epoch": 0.15, "grad_norm": 6.502325534820557, "learning_rate": 1.999571304749932e-06, "loss": 0.7462, "step": 1013 }, { "epoch": 0.15, "grad_norm": 7.3409528732299805, "learning_rate": 1.9995666580165526e-06, "loss": 0.7909, "step": 1014 }, { "epoch": 0.15, "grad_norm": 6.677172660827637, "learning_rate": 1.9995619862406546e-06, "loss": 0.8493, "step": 1015 }, { "epoch": 0.15, "grad_norm": 5.903087615966797, "learning_rate": 1.9995572894223555e-06, "loss": 0.7275, "step": 1016 }, { "epoch": 0.15, "grad_norm": 6.724741458892822, "learning_rate": 1.9995525675617724e-06, "loss": 0.7156, "step": 1017 }, { "epoch": 0.15, "grad_norm": 6.8722124099731445, "learning_rate": 1.9995478206590246e-06, "loss": 0.8722, "step": 1018 }, { "epoch": 0.15, "grad_norm": 7.378407001495361, "learning_rate": 1.999543048714229e-06, "loss": 0.8085, "step": 1019 }, { "epoch": 0.15, "grad_norm": 6.412281513214111, "learning_rate": 1.9995382517275073e-06, "loss": 0.8162, "step": 1020 }, { "epoch": 0.15, "grad_norm": 6.881969928741455, "learning_rate": 1.9995334296989785e-06, "loss": 0.7826, "step": 1021 }, { "epoch": 0.15, "grad_norm": 7.1379876136779785, "learning_rate": 1.9995285826287634e-06, "loss": 0.8067, "step": 1022 }, { "epoch": 0.15, "grad_norm": 6.91831111907959, "learning_rate": 1.9995237105169843e-06, "loss": 0.7944, "step": 1023 }, { "epoch": 0.15, "grad_norm": 6.427585601806641, "learning_rate": 1.9995188133637626e-06, "loss": 0.6319, "step": 1024 }, { "epoch": 0.15, "grad_norm": 6.475371837615967, "learning_rate": 1.999513891169221e-06, "loss": 0.751, "step": 1025 }, { "epoch": 0.15, "grad_norm": 6.558658599853516, "learning_rate": 1.9995089439334824e-06, "loss": 0.8098, "step": 1026 }, { "epoch": 0.15, "grad_norm": 7.29237174987793, "learning_rate": 1.999503971656672e-06, "loss": 0.7282, "step": 1027 }, { "epoch": 0.15, "grad_norm": 6.346829414367676, "learning_rate": 1.999498974338913e-06, "loss": 0.7256, "step": 1028 }, { "epoch": 0.15, "grad_norm": 7.403438091278076, "learning_rate": 1.9994939519803312e-06, "loss": 0.8326, "step": 1029 }, { "epoch": 0.15, "grad_norm": 6.531954765319824, "learning_rate": 1.999488904581053e-06, "loss": 0.8067, "step": 1030 }, { "epoch": 0.15, "grad_norm": 7.174241542816162, "learning_rate": 1.9994838321412037e-06, "loss": 0.7755, "step": 1031 }, { "epoch": 0.15, "grad_norm": 6.979996681213379, "learning_rate": 1.9994787346609112e-06, "loss": 0.6885, "step": 1032 }, { "epoch": 0.15, "grad_norm": 6.9339470863342285, "learning_rate": 1.9994736121403035e-06, "loss": 0.7773, "step": 1033 }, { "epoch": 0.15, "grad_norm": 6.549981594085693, "learning_rate": 1.9994684645795074e-06, "loss": 0.7104, "step": 1034 }, { "epoch": 0.15, "grad_norm": 7.176489353179932, "learning_rate": 1.9994632919786534e-06, "loss": 0.8253, "step": 1035 }, { "epoch": 0.15, "grad_norm": 6.913437843322754, "learning_rate": 1.9994580943378708e-06, "loss": 0.8371, "step": 1036 }, { "epoch": 0.15, "grad_norm": 6.669191837310791, "learning_rate": 1.9994528716572894e-06, "loss": 0.7669, "step": 1037 }, { "epoch": 0.15, "grad_norm": 7.541450500488281, "learning_rate": 1.9994476239370403e-06, "loss": 0.7666, "step": 1038 }, { "epoch": 0.15, "grad_norm": 6.560755729675293, "learning_rate": 1.999442351177255e-06, "loss": 0.7811, "step": 1039 }, { "epoch": 0.15, "grad_norm": 6.100184440612793, "learning_rate": 1.9994370533780654e-06, "loss": 0.76, "step": 1040 }, { "epoch": 0.15, "grad_norm": 8.012386322021484, "learning_rate": 1.9994317305396044e-06, "loss": 0.7542, "step": 1041 }, { "epoch": 0.15, "grad_norm": 7.041454315185547, "learning_rate": 1.999426382662005e-06, "loss": 0.8457, "step": 1042 }, { "epoch": 0.15, "grad_norm": 6.163348197937012, "learning_rate": 1.9994210097454014e-06, "loss": 0.7884, "step": 1043 }, { "epoch": 0.15, "grad_norm": 6.768790245056152, "learning_rate": 1.999415611789929e-06, "loss": 0.767, "step": 1044 }, { "epoch": 0.15, "grad_norm": 6.252896308898926, "learning_rate": 1.9994101887957218e-06, "loss": 0.8202, "step": 1045 }, { "epoch": 0.15, "grad_norm": 6.1085381507873535, "learning_rate": 1.9994047407629163e-06, "loss": 0.786, "step": 1046 }, { "epoch": 0.15, "grad_norm": 6.669069766998291, "learning_rate": 1.9993992676916493e-06, "loss": 0.8269, "step": 1047 }, { "epoch": 0.15, "grad_norm": 7.6285834312438965, "learning_rate": 1.9993937695820565e-06, "loss": 0.8248, "step": 1048 }, { "epoch": 0.15, "grad_norm": 6.371428966522217, "learning_rate": 1.9993882464342774e-06, "loss": 0.8204, "step": 1049 }, { "epoch": 0.15, "grad_norm": 6.5974297523498535, "learning_rate": 1.9993826982484496e-06, "loss": 0.7268, "step": 1050 }, { "epoch": 0.15, "grad_norm": 6.600338935852051, "learning_rate": 1.999377125024712e-06, "loss": 0.7556, "step": 1051 }, { "epoch": 0.15, "grad_norm": 6.782146453857422, "learning_rate": 1.9993715267632043e-06, "loss": 0.7345, "step": 1052 }, { "epoch": 0.15, "grad_norm": 7.208761215209961, "learning_rate": 1.9993659034640664e-06, "loss": 0.6498, "step": 1053 }, { "epoch": 0.15, "grad_norm": 7.075248718261719, "learning_rate": 1.9993602551274406e-06, "loss": 0.7831, "step": 1054 }, { "epoch": 0.15, "grad_norm": 7.3190741539001465, "learning_rate": 1.9993545817534665e-06, "loss": 0.7236, "step": 1055 }, { "epoch": 0.15, "grad_norm": 6.530895233154297, "learning_rate": 1.9993488833422878e-06, "loss": 0.6727, "step": 1056 }, { "epoch": 0.15, "grad_norm": 6.763302326202393, "learning_rate": 1.999343159894046e-06, "loss": 0.7025, "step": 1057 }, { "epoch": 0.15, "grad_norm": 6.962917327880859, "learning_rate": 1.999337411408885e-06, "loss": 0.7443, "step": 1058 }, { "epoch": 0.15, "grad_norm": 6.3945746421813965, "learning_rate": 1.9993316378869498e-06, "loss": 0.7588, "step": 1059 }, { "epoch": 0.15, "grad_norm": 6.936639308929443, "learning_rate": 1.9993258393283835e-06, "loss": 0.807, "step": 1060 }, { "epoch": 0.15, "grad_norm": 6.843479156494141, "learning_rate": 1.9993200157333323e-06, "loss": 0.7145, "step": 1061 }, { "epoch": 0.15, "grad_norm": 6.346628665924072, "learning_rate": 1.9993141671019416e-06, "loss": 0.7743, "step": 1062 }, { "epoch": 0.15, "grad_norm": 6.447005748748779, "learning_rate": 1.999308293434358e-06, "loss": 0.7346, "step": 1063 }, { "epoch": 0.15, "grad_norm": 7.375465393066406, "learning_rate": 1.999302394730729e-06, "loss": 0.7696, "step": 1064 }, { "epoch": 0.15, "grad_norm": 6.977060794830322, "learning_rate": 1.9992964709912024e-06, "loss": 0.7687, "step": 1065 }, { "epoch": 0.15, "grad_norm": 6.237585544586182, "learning_rate": 1.9992905222159263e-06, "loss": 0.7293, "step": 1066 }, { "epoch": 0.15, "grad_norm": 6.354983329772949, "learning_rate": 1.9992845484050494e-06, "loss": 0.723, "step": 1067 }, { "epoch": 0.15, "grad_norm": 6.596430778503418, "learning_rate": 1.9992785495587225e-06, "loss": 0.7707, "step": 1068 }, { "epoch": 0.16, "grad_norm": 8.053667068481445, "learning_rate": 1.999272525677095e-06, "loss": 0.8937, "step": 1069 }, { "epoch": 0.16, "grad_norm": 6.827588081359863, "learning_rate": 1.999266476760318e-06, "loss": 0.7702, "step": 1070 }, { "epoch": 0.16, "grad_norm": 6.727241516113281, "learning_rate": 1.9992604028085427e-06, "loss": 0.732, "step": 1071 }, { "epoch": 0.16, "grad_norm": 7.019636631011963, "learning_rate": 1.999254303821922e-06, "loss": 0.7863, "step": 1072 }, { "epoch": 0.16, "grad_norm": 7.485666275024414, "learning_rate": 1.999248179800608e-06, "loss": 0.7211, "step": 1073 }, { "epoch": 0.16, "grad_norm": 7.18720006942749, "learning_rate": 1.999242030744755e-06, "loss": 0.7491, "step": 1074 }, { "epoch": 0.16, "grad_norm": 6.6184401512146, "learning_rate": 1.9992358566545162e-06, "loss": 0.8174, "step": 1075 }, { "epoch": 0.16, "grad_norm": 6.361228942871094, "learning_rate": 1.9992296575300463e-06, "loss": 0.811, "step": 1076 }, { "epoch": 0.16, "grad_norm": 6.098464488983154, "learning_rate": 1.9992234333715015e-06, "loss": 0.6996, "step": 1077 }, { "epoch": 0.16, "grad_norm": 6.661244869232178, "learning_rate": 1.999217184179037e-06, "loss": 0.8076, "step": 1078 }, { "epoch": 0.16, "grad_norm": 6.73370885848999, "learning_rate": 1.999210909952809e-06, "loss": 0.869, "step": 1079 }, { "epoch": 0.16, "grad_norm": 6.7155914306640625, "learning_rate": 1.9992046106929757e-06, "loss": 0.8253, "step": 1080 }, { "epoch": 0.16, "grad_norm": 6.531632900238037, "learning_rate": 1.9991982863996943e-06, "loss": 0.6969, "step": 1081 }, { "epoch": 0.16, "grad_norm": 7.491302490234375, "learning_rate": 1.9991919370731232e-06, "loss": 0.9197, "step": 1082 }, { "epoch": 0.16, "grad_norm": 6.464712619781494, "learning_rate": 1.999185562713422e-06, "loss": 0.7333, "step": 1083 }, { "epoch": 0.16, "grad_norm": 7.1281232833862305, "learning_rate": 1.99917916332075e-06, "loss": 0.8266, "step": 1084 }, { "epoch": 0.16, "grad_norm": 7.115824222564697, "learning_rate": 1.999172738895267e-06, "loss": 0.8829, "step": 1085 }, { "epoch": 0.16, "grad_norm": 7.447447776794434, "learning_rate": 1.999166289437135e-06, "loss": 0.8104, "step": 1086 }, { "epoch": 0.16, "grad_norm": 6.712973594665527, "learning_rate": 1.9991598149465152e-06, "loss": 0.7566, "step": 1087 }, { "epoch": 0.16, "grad_norm": 6.271355628967285, "learning_rate": 1.9991533154235694e-06, "loss": 0.8042, "step": 1088 }, { "epoch": 0.16, "grad_norm": 6.972302436828613, "learning_rate": 1.999146790868461e-06, "loss": 0.7404, "step": 1089 }, { "epoch": 0.16, "grad_norm": 6.717840194702148, "learning_rate": 1.999140241281353e-06, "loss": 0.909, "step": 1090 }, { "epoch": 0.16, "grad_norm": 9.354546546936035, "learning_rate": 1.9991336666624094e-06, "loss": 0.7556, "step": 1091 }, { "epoch": 0.16, "grad_norm": 6.363050937652588, "learning_rate": 1.999127067011796e-06, "loss": 0.754, "step": 1092 }, { "epoch": 0.16, "grad_norm": 7.342151641845703, "learning_rate": 1.9991204423296765e-06, "loss": 0.9172, "step": 1093 }, { "epoch": 0.16, "grad_norm": 7.046883583068848, "learning_rate": 1.999113792616218e-06, "loss": 0.7917, "step": 1094 }, { "epoch": 0.16, "grad_norm": 6.914419651031494, "learning_rate": 1.9991071178715867e-06, "loss": 0.7034, "step": 1095 }, { "epoch": 0.16, "grad_norm": 6.934471130371094, "learning_rate": 1.9991004180959503e-06, "loss": 0.8284, "step": 1096 }, { "epoch": 0.16, "grad_norm": 7.400130271911621, "learning_rate": 1.999093693289476e-06, "loss": 0.7238, "step": 1097 }, { "epoch": 0.16, "grad_norm": 6.727570056915283, "learning_rate": 1.9990869434523328e-06, "loss": 0.7988, "step": 1098 }, { "epoch": 0.16, "grad_norm": 6.502447605133057, "learning_rate": 1.9990801685846896e-06, "loss": 0.7854, "step": 1099 }, { "epoch": 0.16, "grad_norm": 7.068329811096191, "learning_rate": 1.999073368686716e-06, "loss": 0.8136, "step": 1100 }, { "epoch": 0.16, "grad_norm": 7.071192264556885, "learning_rate": 1.9990665437585823e-06, "loss": 0.7731, "step": 1101 }, { "epoch": 0.16, "grad_norm": 6.966594696044922, "learning_rate": 1.99905969380046e-06, "loss": 0.8221, "step": 1102 }, { "epoch": 0.16, "grad_norm": 6.812247276306152, "learning_rate": 1.99905281881252e-06, "loss": 0.7099, "step": 1103 }, { "epoch": 0.16, "grad_norm": 6.326827049255371, "learning_rate": 1.999045918794935e-06, "loss": 0.7916, "step": 1104 }, { "epoch": 0.16, "grad_norm": 7.9471001625061035, "learning_rate": 1.9990389937478777e-06, "loss": 0.9171, "step": 1105 }, { "epoch": 0.16, "grad_norm": 5.969440460205078, "learning_rate": 1.999032043671522e-06, "loss": 0.7467, "step": 1106 }, { "epoch": 0.16, "grad_norm": 6.486499786376953, "learning_rate": 1.9990250685660418e-06, "loss": 0.7384, "step": 1107 }, { "epoch": 0.16, "grad_norm": 6.331212997436523, "learning_rate": 1.9990180684316113e-06, "loss": 0.7256, "step": 1108 }, { "epoch": 0.16, "grad_norm": 6.516169548034668, "learning_rate": 1.9990110432684063e-06, "loss": 0.7246, "step": 1109 }, { "epoch": 0.16, "grad_norm": 7.5490827560424805, "learning_rate": 1.9990039930766034e-06, "loss": 0.8197, "step": 1110 }, { "epoch": 0.16, "grad_norm": 6.433105945587158, "learning_rate": 1.9989969178563782e-06, "loss": 0.7247, "step": 1111 }, { "epoch": 0.16, "grad_norm": 6.118059158325195, "learning_rate": 1.998989817607909e-06, "loss": 0.7675, "step": 1112 }, { "epoch": 0.16, "grad_norm": 6.099983215332031, "learning_rate": 1.9989826923313726e-06, "loss": 0.6632, "step": 1113 }, { "epoch": 0.16, "grad_norm": 6.314545631408691, "learning_rate": 1.9989755420269478e-06, "loss": 0.7278, "step": 1114 }, { "epoch": 0.16, "grad_norm": 6.388054847717285, "learning_rate": 1.998968366694814e-06, "loss": 0.7579, "step": 1115 }, { "epoch": 0.16, "grad_norm": 6.24468994140625, "learning_rate": 1.9989611663351512e-06, "loss": 0.7675, "step": 1116 }, { "epoch": 0.16, "grad_norm": 6.70665168762207, "learning_rate": 1.9989539409481395e-06, "loss": 0.7852, "step": 1117 }, { "epoch": 0.16, "grad_norm": 6.759531497955322, "learning_rate": 1.99894669053396e-06, "loss": 0.7525, "step": 1118 }, { "epoch": 0.16, "grad_norm": 6.566083908081055, "learning_rate": 1.998939415092794e-06, "loss": 0.8102, "step": 1119 }, { "epoch": 0.16, "grad_norm": 7.468013286590576, "learning_rate": 1.9989321146248247e-06, "loss": 0.8438, "step": 1120 }, { "epoch": 0.16, "grad_norm": 7.226218223571777, "learning_rate": 1.9989247891302335e-06, "loss": 0.722, "step": 1121 }, { "epoch": 0.16, "grad_norm": 6.715876579284668, "learning_rate": 1.9989174386092056e-06, "loss": 0.7716, "step": 1122 }, { "epoch": 0.16, "grad_norm": 7.296370983123779, "learning_rate": 1.998910063061924e-06, "loss": 0.7376, "step": 1123 }, { "epoch": 0.16, "grad_norm": 6.110001087188721, "learning_rate": 1.998902662488574e-06, "loss": 0.6695, "step": 1124 }, { "epoch": 0.16, "grad_norm": 6.245277404785156, "learning_rate": 1.9988952368893402e-06, "loss": 0.8148, "step": 1125 }, { "epoch": 0.16, "grad_norm": 6.668587684631348, "learning_rate": 1.99888778626441e-06, "loss": 0.8412, "step": 1126 }, { "epoch": 0.16, "grad_norm": 6.64423942565918, "learning_rate": 1.998880310613969e-06, "loss": 0.7593, "step": 1127 }, { "epoch": 0.16, "grad_norm": 6.6529412269592285, "learning_rate": 1.998872809938205e-06, "loss": 0.7064, "step": 1128 }, { "epoch": 0.16, "grad_norm": 6.821640968322754, "learning_rate": 1.9988652842373055e-06, "loss": 0.7778, "step": 1129 }, { "epoch": 0.16, "grad_norm": 7.471836566925049, "learning_rate": 1.998857733511459e-06, "loss": 0.7964, "step": 1130 }, { "epoch": 0.16, "grad_norm": 6.220052242279053, "learning_rate": 1.9988501577608558e-06, "loss": 0.7194, "step": 1131 }, { "epoch": 0.16, "grad_norm": 7.016927242279053, "learning_rate": 1.9988425569856842e-06, "loss": 0.7226, "step": 1132 }, { "epoch": 0.16, "grad_norm": 6.5195417404174805, "learning_rate": 1.9988349311861355e-06, "loss": 0.7564, "step": 1133 }, { "epoch": 0.16, "grad_norm": 7.464997291564941, "learning_rate": 1.9988272803624006e-06, "loss": 0.8246, "step": 1134 }, { "epoch": 0.16, "grad_norm": 6.102750778198242, "learning_rate": 1.9988196045146714e-06, "loss": 0.762, "step": 1135 }, { "epoch": 0.16, "grad_norm": 9.303552627563477, "learning_rate": 1.9988119036431393e-06, "loss": 0.8077, "step": 1136 }, { "epoch": 0.16, "grad_norm": 7.009661674499512, "learning_rate": 1.9988041777479983e-06, "loss": 0.7003, "step": 1137 }, { "epoch": 0.17, "grad_norm": 7.035353183746338, "learning_rate": 1.998796426829441e-06, "loss": 0.8554, "step": 1138 }, { "epoch": 0.17, "grad_norm": 7.097693920135498, "learning_rate": 1.9987886508876625e-06, "loss": 0.9154, "step": 1139 }, { "epoch": 0.17, "grad_norm": 6.35434627532959, "learning_rate": 1.9987808499228573e-06, "loss": 0.7817, "step": 1140 }, { "epoch": 0.17, "grad_norm": 6.765339374542236, "learning_rate": 1.998773023935221e-06, "loss": 0.8225, "step": 1141 }, { "epoch": 0.17, "grad_norm": 6.764370441436768, "learning_rate": 1.9987651729249487e-06, "loss": 0.7899, "step": 1142 }, { "epoch": 0.17, "grad_norm": 6.9335761070251465, "learning_rate": 1.998757296892238e-06, "loss": 0.78, "step": 1143 }, { "epoch": 0.17, "grad_norm": 6.458379745483398, "learning_rate": 1.9987493958372863e-06, "loss": 0.7876, "step": 1144 }, { "epoch": 0.17, "grad_norm": 6.740540504455566, "learning_rate": 1.9987414697602913e-06, "loss": 0.8694, "step": 1145 }, { "epoch": 0.17, "grad_norm": 6.006959915161133, "learning_rate": 1.9987335186614513e-06, "loss": 0.7029, "step": 1146 }, { "epoch": 0.17, "grad_norm": 6.346452713012695, "learning_rate": 1.9987255425409658e-06, "loss": 0.7084, "step": 1147 }, { "epoch": 0.17, "grad_norm": 7.289219856262207, "learning_rate": 1.9987175413990347e-06, "loss": 0.8316, "step": 1148 }, { "epoch": 0.17, "grad_norm": 6.619409084320068, "learning_rate": 1.9987095152358584e-06, "loss": 0.7961, "step": 1149 }, { "epoch": 0.17, "grad_norm": 6.886651515960693, "learning_rate": 1.998701464051638e-06, "loss": 0.8283, "step": 1150 }, { "epoch": 0.17, "grad_norm": 6.818440914154053, "learning_rate": 1.998693387846575e-06, "loss": 0.7357, "step": 1151 }, { "epoch": 0.17, "grad_norm": 8.484580039978027, "learning_rate": 1.9986852866208716e-06, "loss": 0.7732, "step": 1152 }, { "epoch": 0.17, "grad_norm": 6.401737213134766, "learning_rate": 1.9986771603747314e-06, "loss": 0.8243, "step": 1153 }, { "epoch": 0.17, "grad_norm": 6.80466890335083, "learning_rate": 1.9986690091083577e-06, "loss": 0.8209, "step": 1154 }, { "epoch": 0.17, "grad_norm": 7.211543083190918, "learning_rate": 1.9986608328219542e-06, "loss": 0.719, "step": 1155 }, { "epoch": 0.17, "grad_norm": 6.354741096496582, "learning_rate": 1.9986526315157264e-06, "loss": 0.7686, "step": 1156 }, { "epoch": 0.17, "grad_norm": 6.634754180908203, "learning_rate": 1.99864440518988e-06, "loss": 0.6348, "step": 1157 }, { "epoch": 0.17, "grad_norm": 7.240102291107178, "learning_rate": 1.9986361538446197e-06, "loss": 0.7577, "step": 1158 }, { "epoch": 0.17, "grad_norm": 6.442803859710693, "learning_rate": 1.998627877480154e-06, "loss": 0.7782, "step": 1159 }, { "epoch": 0.17, "grad_norm": 6.989999294281006, "learning_rate": 1.998619576096689e-06, "loss": 0.8394, "step": 1160 }, { "epoch": 0.17, "grad_norm": 6.448018550872803, "learning_rate": 1.9986112496944335e-06, "loss": 0.8162, "step": 1161 }, { "epoch": 0.17, "grad_norm": 6.873409748077393, "learning_rate": 1.9986028982735954e-06, "loss": 0.8434, "step": 1162 }, { "epoch": 0.17, "grad_norm": 6.93314266204834, "learning_rate": 1.9985945218343844e-06, "loss": 0.7435, "step": 1163 }, { "epoch": 0.17, "grad_norm": 7.669681549072266, "learning_rate": 1.99858612037701e-06, "loss": 0.8228, "step": 1164 }, { "epoch": 0.17, "grad_norm": 7.525571346282959, "learning_rate": 1.998577693901683e-06, "loss": 0.7862, "step": 1165 }, { "epoch": 0.17, "grad_norm": 6.201175212860107, "learning_rate": 1.998569242408615e-06, "loss": 0.6952, "step": 1166 }, { "epoch": 0.17, "grad_norm": 6.340773582458496, "learning_rate": 1.998560765898016e-06, "loss": 0.7099, "step": 1167 }, { "epoch": 0.17, "grad_norm": 6.767529010772705, "learning_rate": 1.9985522643701003e-06, "loss": 0.7944, "step": 1168 }, { "epoch": 0.17, "grad_norm": 6.46281623840332, "learning_rate": 1.9985437378250797e-06, "loss": 0.8387, "step": 1169 }, { "epoch": 0.17, "grad_norm": 7.041189670562744, "learning_rate": 1.9985351862631684e-06, "loss": 0.7536, "step": 1170 }, { "epoch": 0.17, "grad_norm": 7.481879234313965, "learning_rate": 1.9985266096845804e-06, "loss": 0.8031, "step": 1171 }, { "epoch": 0.17, "grad_norm": 7.269918918609619, "learning_rate": 1.9985180080895306e-06, "loss": 0.8569, "step": 1172 }, { "epoch": 0.17, "grad_norm": 6.228784561157227, "learning_rate": 1.9985093814782344e-06, "loss": 0.7217, "step": 1173 }, { "epoch": 0.17, "grad_norm": 7.041831016540527, "learning_rate": 1.9985007298509086e-06, "loss": 0.7495, "step": 1174 }, { "epoch": 0.17, "grad_norm": 6.893660545349121, "learning_rate": 1.998492053207769e-06, "loss": 0.6656, "step": 1175 }, { "epoch": 0.17, "grad_norm": 6.50741720199585, "learning_rate": 1.9984833515490335e-06, "loss": 0.8163, "step": 1176 }, { "epoch": 0.17, "grad_norm": 6.768928050994873, "learning_rate": 1.9984746248749197e-06, "loss": 0.6902, "step": 1177 }, { "epoch": 0.17, "grad_norm": 6.146579742431641, "learning_rate": 1.998465873185647e-06, "loss": 0.8176, "step": 1178 }, { "epoch": 0.17, "grad_norm": 6.851118564605713, "learning_rate": 1.998457096481434e-06, "loss": 0.7466, "step": 1179 }, { "epoch": 0.17, "grad_norm": 6.6075663566589355, "learning_rate": 1.9984482947625006e-06, "loss": 0.7599, "step": 1180 }, { "epoch": 0.17, "grad_norm": 6.5358757972717285, "learning_rate": 1.9984394680290674e-06, "loss": 0.8644, "step": 1181 }, { "epoch": 0.17, "grad_norm": 6.712706565856934, "learning_rate": 1.998430616281356e-06, "loss": 0.7755, "step": 1182 }, { "epoch": 0.17, "grad_norm": 6.275291919708252, "learning_rate": 1.9984217395195874e-06, "loss": 0.8023, "step": 1183 }, { "epoch": 0.17, "grad_norm": 7.857508659362793, "learning_rate": 1.9984128377439847e-06, "loss": 0.8531, "step": 1184 }, { "epoch": 0.17, "grad_norm": 6.148416996002197, "learning_rate": 1.9984039109547702e-06, "loss": 0.8636, "step": 1185 }, { "epoch": 0.17, "grad_norm": 6.866551876068115, "learning_rate": 1.9983949591521684e-06, "loss": 0.7684, "step": 1186 }, { "epoch": 0.17, "grad_norm": 6.669180870056152, "learning_rate": 1.998385982336403e-06, "loss": 0.7888, "step": 1187 }, { "epoch": 0.17, "grad_norm": 5.981822967529297, "learning_rate": 1.998376980507699e-06, "loss": 0.7675, "step": 1188 }, { "epoch": 0.17, "grad_norm": 6.123929500579834, "learning_rate": 1.998367953666282e-06, "loss": 0.8364, "step": 1189 }, { "epoch": 0.17, "grad_norm": 6.53389310836792, "learning_rate": 1.9983589018123775e-06, "loss": 0.7682, "step": 1190 }, { "epoch": 0.17, "grad_norm": 6.140202045440674, "learning_rate": 1.9983498249462136e-06, "loss": 0.787, "step": 1191 }, { "epoch": 0.17, "grad_norm": 6.7856855392456055, "learning_rate": 1.9983407230680168e-06, "loss": 0.7435, "step": 1192 }, { "epoch": 0.17, "grad_norm": 6.770361423492432, "learning_rate": 1.9983315961780152e-06, "loss": 0.8012, "step": 1193 }, { "epoch": 0.17, "grad_norm": 6.608659744262695, "learning_rate": 1.9983224442764378e-06, "loss": 0.728, "step": 1194 }, { "epoch": 0.17, "grad_norm": 7.142661094665527, "learning_rate": 1.9983132673635135e-06, "loss": 0.7763, "step": 1195 }, { "epoch": 0.17, "grad_norm": 6.358465671539307, "learning_rate": 1.9983040654394727e-06, "loss": 0.7329, "step": 1196 }, { "epoch": 0.17, "grad_norm": 6.9781107902526855, "learning_rate": 1.998294838504545e-06, "loss": 0.8353, "step": 1197 }, { "epoch": 0.17, "grad_norm": 5.721875190734863, "learning_rate": 1.9982855865589626e-06, "loss": 0.6687, "step": 1198 }, { "epoch": 0.17, "grad_norm": 6.436611652374268, "learning_rate": 1.9982763096029565e-06, "loss": 0.7508, "step": 1199 }, { "epoch": 0.17, "grad_norm": 6.033949375152588, "learning_rate": 1.99826700763676e-06, "loss": 0.7334, "step": 1200 }, { "epoch": 0.17, "grad_norm": 6.872565746307373, "learning_rate": 1.9982576806606054e-06, "loss": 0.7322, "step": 1201 }, { "epoch": 0.17, "grad_norm": 7.724605083465576, "learning_rate": 1.9982483286747263e-06, "loss": 0.8, "step": 1202 }, { "epoch": 0.17, "grad_norm": 6.449865818023682, "learning_rate": 1.998238951679358e-06, "loss": 0.7647, "step": 1203 }, { "epoch": 0.17, "grad_norm": 7.592318534851074, "learning_rate": 1.9982295496747343e-06, "loss": 0.7134, "step": 1204 }, { "epoch": 0.17, "grad_norm": 6.048961639404297, "learning_rate": 1.998220122661091e-06, "loss": 0.7576, "step": 1205 }, { "epoch": 0.17, "grad_norm": 7.267712593078613, "learning_rate": 1.9982106706386647e-06, "loss": 0.8181, "step": 1206 }, { "epoch": 0.18, "grad_norm": 7.095776557922363, "learning_rate": 1.998201193607692e-06, "loss": 0.8178, "step": 1207 }, { "epoch": 0.18, "grad_norm": 6.422061920166016, "learning_rate": 1.99819169156841e-06, "loss": 0.7545, "step": 1208 }, { "epoch": 0.18, "grad_norm": 6.910534381866455, "learning_rate": 1.9981821645210573e-06, "loss": 0.766, "step": 1209 }, { "epoch": 0.18, "grad_norm": 6.246405601501465, "learning_rate": 1.9981726124658723e-06, "loss": 0.7784, "step": 1210 }, { "epoch": 0.18, "grad_norm": 7.024847030639648, "learning_rate": 1.9981630354030946e-06, "loss": 0.7617, "step": 1211 }, { "epoch": 0.18, "grad_norm": 6.40736198425293, "learning_rate": 1.9981534333329637e-06, "loss": 0.7029, "step": 1212 }, { "epoch": 0.18, "grad_norm": 6.448663711547852, "learning_rate": 1.9981438062557204e-06, "loss": 0.7829, "step": 1213 }, { "epoch": 0.18, "grad_norm": 6.53660774230957, "learning_rate": 1.998134154171606e-06, "loss": 0.6831, "step": 1214 }, { "epoch": 0.18, "grad_norm": 6.45297908782959, "learning_rate": 1.9981244770808617e-06, "loss": 0.7807, "step": 1215 }, { "epoch": 0.18, "grad_norm": 6.827112674713135, "learning_rate": 1.998114774983731e-06, "loss": 0.7223, "step": 1216 }, { "epoch": 0.18, "grad_norm": 6.145047664642334, "learning_rate": 1.9981050478804558e-06, "loss": 0.7645, "step": 1217 }, { "epoch": 0.18, "grad_norm": 7.091270446777344, "learning_rate": 1.998095295771281e-06, "loss": 0.8346, "step": 1218 }, { "epoch": 0.18, "grad_norm": 7.172962188720703, "learning_rate": 1.99808551865645e-06, "loss": 0.7785, "step": 1219 }, { "epoch": 0.18, "grad_norm": 7.367718696594238, "learning_rate": 1.998075716536208e-06, "loss": 0.8015, "step": 1220 }, { "epoch": 0.18, "grad_norm": 7.330017566680908, "learning_rate": 1.9980658894108e-06, "loss": 0.795, "step": 1221 }, { "epoch": 0.18, "grad_norm": 6.984414577484131, "learning_rate": 1.998056037280474e-06, "loss": 0.8078, "step": 1222 }, { "epoch": 0.18, "grad_norm": 6.618412971496582, "learning_rate": 1.998046160145475e-06, "loss": 0.7262, "step": 1223 }, { "epoch": 0.18, "grad_norm": 6.660092830657959, "learning_rate": 1.998036258006051e-06, "loss": 0.6447, "step": 1224 }, { "epoch": 0.18, "grad_norm": 6.425216197967529, "learning_rate": 1.9980263308624504e-06, "loss": 0.834, "step": 1225 }, { "epoch": 0.18, "grad_norm": 6.927285671234131, "learning_rate": 1.9980163787149217e-06, "loss": 0.7299, "step": 1226 }, { "epoch": 0.18, "grad_norm": 6.496787071228027, "learning_rate": 1.9980064015637144e-06, "loss": 0.7083, "step": 1227 }, { "epoch": 0.18, "grad_norm": 6.698246479034424, "learning_rate": 1.997996399409078e-06, "loss": 0.8271, "step": 1228 }, { "epoch": 0.18, "grad_norm": 6.547418594360352, "learning_rate": 1.9979863722512637e-06, "loss": 0.8387, "step": 1229 }, { "epoch": 0.18, "grad_norm": 6.887234210968018, "learning_rate": 1.997976320090522e-06, "loss": 0.7796, "step": 1230 }, { "epoch": 0.18, "grad_norm": 6.799234390258789, "learning_rate": 1.9979662429271054e-06, "loss": 0.7116, "step": 1231 }, { "epoch": 0.18, "grad_norm": 6.643988609313965, "learning_rate": 1.997956140761266e-06, "loss": 0.8367, "step": 1232 }, { "epoch": 0.18, "grad_norm": 6.783056735992432, "learning_rate": 1.997946013593257e-06, "loss": 0.7972, "step": 1233 }, { "epoch": 0.18, "grad_norm": 6.6343607902526855, "learning_rate": 1.997935861423332e-06, "loss": 0.681, "step": 1234 }, { "epoch": 0.18, "grad_norm": 7.593239784240723, "learning_rate": 1.9979256842517458e-06, "loss": 0.7551, "step": 1235 }, { "epoch": 0.18, "grad_norm": 8.097314834594727, "learning_rate": 1.9979154820787533e-06, "loss": 0.8696, "step": 1236 }, { "epoch": 0.18, "grad_norm": 6.8463215827941895, "learning_rate": 1.9979052549046093e-06, "loss": 0.7693, "step": 1237 }, { "epoch": 0.18, "grad_norm": 6.846463203430176, "learning_rate": 1.9978950027295705e-06, "loss": 0.8332, "step": 1238 }, { "epoch": 0.18, "grad_norm": 7.8948655128479, "learning_rate": 1.9978847255538943e-06, "loss": 0.7898, "step": 1239 }, { "epoch": 0.18, "grad_norm": 6.514195919036865, "learning_rate": 1.997874423377837e-06, "loss": 0.6851, "step": 1240 }, { "epoch": 0.18, "grad_norm": 6.279211521148682, "learning_rate": 1.997864096201658e-06, "loss": 0.7365, "step": 1241 }, { "epoch": 0.18, "grad_norm": 6.61387300491333, "learning_rate": 1.9978537440256154e-06, "loss": 0.7475, "step": 1242 }, { "epoch": 0.18, "grad_norm": 6.65685510635376, "learning_rate": 1.9978433668499685e-06, "loss": 0.7025, "step": 1243 }, { "epoch": 0.18, "grad_norm": 6.277799129486084, "learning_rate": 1.997832964674977e-06, "loss": 0.8276, "step": 1244 }, { "epoch": 0.18, "grad_norm": 6.89757776260376, "learning_rate": 1.9978225375009023e-06, "loss": 0.7503, "step": 1245 }, { "epoch": 0.18, "grad_norm": 6.08334493637085, "learning_rate": 1.9978120853280053e-06, "loss": 0.8012, "step": 1246 }, { "epoch": 0.18, "grad_norm": 7.165166854858398, "learning_rate": 1.997801608156547e-06, "loss": 0.7432, "step": 1247 }, { "epoch": 0.18, "grad_norm": 6.316115856170654, "learning_rate": 1.9977911059867917e-06, "loss": 0.7754, "step": 1248 }, { "epoch": 0.18, "grad_norm": 6.09346866607666, "learning_rate": 1.9977805788190006e-06, "loss": 0.7178, "step": 1249 }, { "epoch": 0.18, "grad_norm": 7.4101386070251465, "learning_rate": 1.997770026653439e-06, "loss": 0.7236, "step": 1250 }, { "epoch": 0.18, "grad_norm": 6.623048782348633, "learning_rate": 1.9977594494903706e-06, "loss": 0.8107, "step": 1251 }, { "epoch": 0.18, "grad_norm": 6.570058345794678, "learning_rate": 1.9977488473300597e-06, "loss": 0.7174, "step": 1252 }, { "epoch": 0.18, "grad_norm": 6.373749732971191, "learning_rate": 1.9977382201727732e-06, "loss": 0.6878, "step": 1253 }, { "epoch": 0.18, "grad_norm": 5.936563491821289, "learning_rate": 1.9977275680187766e-06, "loss": 0.7275, "step": 1254 }, { "epoch": 0.18, "grad_norm": 6.229773998260498, "learning_rate": 1.9977168908683367e-06, "loss": 0.7701, "step": 1255 }, { "epoch": 0.18, "grad_norm": 6.049717426300049, "learning_rate": 1.997706188721721e-06, "loss": 0.7191, "step": 1256 }, { "epoch": 0.18, "grad_norm": 7.042580604553223, "learning_rate": 1.9976954615791985e-06, "loss": 0.815, "step": 1257 }, { "epoch": 0.18, "grad_norm": 6.638627529144287, "learning_rate": 1.997684709441037e-06, "loss": 0.8462, "step": 1258 }, { "epoch": 0.18, "grad_norm": 6.556238651275635, "learning_rate": 1.9976739323075067e-06, "loss": 0.6868, "step": 1259 }, { "epoch": 0.18, "grad_norm": 7.336675643920898, "learning_rate": 1.9976631301788762e-06, "loss": 0.7294, "step": 1260 }, { "epoch": 0.18, "grad_norm": 5.886346340179443, "learning_rate": 1.9976523030554174e-06, "loss": 0.7597, "step": 1261 }, { "epoch": 0.18, "grad_norm": 6.687431335449219, "learning_rate": 1.997641450937401e-06, "loss": 0.7642, "step": 1262 }, { "epoch": 0.18, "grad_norm": 6.656355381011963, "learning_rate": 1.9976305738250994e-06, "loss": 0.7635, "step": 1263 }, { "epoch": 0.18, "grad_norm": 7.0863776206970215, "learning_rate": 1.997619671718784e-06, "loss": 0.7225, "step": 1264 }, { "epoch": 0.18, "grad_norm": 6.320061206817627, "learning_rate": 1.9976087446187297e-06, "loss": 0.723, "step": 1265 }, { "epoch": 0.18, "grad_norm": 6.720461368560791, "learning_rate": 1.997597792525209e-06, "loss": 0.8124, "step": 1266 }, { "epoch": 0.18, "grad_norm": 6.259535789489746, "learning_rate": 1.997586815438496e-06, "loss": 0.7889, "step": 1267 }, { "epoch": 0.18, "grad_norm": 6.100295543670654, "learning_rate": 1.9975758133588667e-06, "loss": 0.7141, "step": 1268 }, { "epoch": 0.18, "grad_norm": 6.357246398925781, "learning_rate": 1.997564786286596e-06, "loss": 0.7527, "step": 1269 }, { "epoch": 0.18, "grad_norm": 6.425607204437256, "learning_rate": 1.9975537342219606e-06, "loss": 0.7618, "step": 1270 }, { "epoch": 0.18, "grad_norm": 6.169266700744629, "learning_rate": 1.997542657165237e-06, "loss": 0.729, "step": 1271 }, { "epoch": 0.18, "grad_norm": 5.950891017913818, "learning_rate": 1.9975315551167035e-06, "loss": 0.7115, "step": 1272 }, { "epoch": 0.18, "grad_norm": 5.8451080322265625, "learning_rate": 1.9975204280766373e-06, "loss": 0.7375, "step": 1273 }, { "epoch": 0.18, "grad_norm": 6.086084842681885, "learning_rate": 1.9975092760453177e-06, "loss": 0.7358, "step": 1274 }, { "epoch": 0.18, "grad_norm": 6.574324131011963, "learning_rate": 1.997498099023024e-06, "loss": 0.7298, "step": 1275 }, { "epoch": 0.19, "grad_norm": 7.294814109802246, "learning_rate": 1.9974868970100362e-06, "loss": 0.8043, "step": 1276 }, { "epoch": 0.19, "grad_norm": 6.974045276641846, "learning_rate": 1.997475670006635e-06, "loss": 0.8626, "step": 1277 }, { "epoch": 0.19, "grad_norm": 8.1318941116333, "learning_rate": 1.9974644180131013e-06, "loss": 0.7815, "step": 1278 }, { "epoch": 0.19, "grad_norm": 5.935003280639648, "learning_rate": 1.9974531410297174e-06, "loss": 0.7592, "step": 1279 }, { "epoch": 0.19, "grad_norm": 5.594265460968018, "learning_rate": 1.997441839056766e-06, "loss": 0.7631, "step": 1280 }, { "epoch": 0.19, "grad_norm": 7.236416339874268, "learning_rate": 1.99743051209453e-06, "loss": 0.7733, "step": 1281 }, { "epoch": 0.19, "grad_norm": 6.39171028137207, "learning_rate": 1.9974191601432927e-06, "loss": 0.6715, "step": 1282 }, { "epoch": 0.19, "grad_norm": 6.231365203857422, "learning_rate": 1.9974077832033393e-06, "loss": 0.7861, "step": 1283 }, { "epoch": 0.19, "grad_norm": 6.951073169708252, "learning_rate": 1.9973963812749546e-06, "loss": 0.682, "step": 1284 }, { "epoch": 0.19, "grad_norm": 6.819732666015625, "learning_rate": 1.997384954358424e-06, "loss": 0.7712, "step": 1285 }, { "epoch": 0.19, "grad_norm": 6.928945064544678, "learning_rate": 1.9973735024540337e-06, "loss": 0.8295, "step": 1286 }, { "epoch": 0.19, "grad_norm": 6.032792091369629, "learning_rate": 1.9973620255620715e-06, "loss": 0.6719, "step": 1287 }, { "epoch": 0.19, "grad_norm": 6.768979072570801, "learning_rate": 1.997350523682824e-06, "loss": 0.8796, "step": 1288 }, { "epoch": 0.19, "grad_norm": 6.507811546325684, "learning_rate": 1.9973389968165795e-06, "loss": 0.7016, "step": 1289 }, { "epoch": 0.19, "grad_norm": 6.167751789093018, "learning_rate": 1.997327444963627e-06, "loss": 0.7133, "step": 1290 }, { "epoch": 0.19, "grad_norm": 6.195384979248047, "learning_rate": 1.9973158681242562e-06, "loss": 0.7308, "step": 1291 }, { "epoch": 0.19, "grad_norm": 6.277507781982422, "learning_rate": 1.9973042662987565e-06, "loss": 0.7104, "step": 1292 }, { "epoch": 0.19, "grad_norm": 7.2537360191345215, "learning_rate": 1.9972926394874188e-06, "loss": 0.8296, "step": 1293 }, { "epoch": 0.19, "grad_norm": 7.604310989379883, "learning_rate": 1.9972809876905344e-06, "loss": 0.7799, "step": 1294 }, { "epoch": 0.19, "grad_norm": 6.301861763000488, "learning_rate": 1.9972693109083956e-06, "loss": 0.765, "step": 1295 }, { "epoch": 0.19, "grad_norm": 6.390196800231934, "learning_rate": 1.997257609141294e-06, "loss": 0.7806, "step": 1296 }, { "epoch": 0.19, "grad_norm": 6.69530725479126, "learning_rate": 1.997245882389524e-06, "loss": 0.779, "step": 1297 }, { "epoch": 0.19, "grad_norm": 6.330497741699219, "learning_rate": 1.9972341306533784e-06, "loss": 0.7513, "step": 1298 }, { "epoch": 0.19, "grad_norm": 5.93858528137207, "learning_rate": 1.9972223539331523e-06, "loss": 0.7352, "step": 1299 }, { "epoch": 0.19, "grad_norm": 6.657297611236572, "learning_rate": 1.99721055222914e-06, "loss": 0.7424, "step": 1300 }, { "epoch": 0.19, "grad_norm": 7.531817436218262, "learning_rate": 1.9971987255416382e-06, "loss": 0.6581, "step": 1301 }, { "epoch": 0.19, "grad_norm": 7.122951984405518, "learning_rate": 1.9971868738709424e-06, "loss": 0.7435, "step": 1302 }, { "epoch": 0.19, "grad_norm": 6.482854843139648, "learning_rate": 1.9971749972173495e-06, "loss": 0.7939, "step": 1303 }, { "epoch": 0.19, "grad_norm": 6.426898002624512, "learning_rate": 1.9971630955811575e-06, "loss": 0.8484, "step": 1304 }, { "epoch": 0.19, "grad_norm": 6.523397922515869, "learning_rate": 1.997151168962664e-06, "loss": 0.7529, "step": 1305 }, { "epoch": 0.19, "grad_norm": 5.926137924194336, "learning_rate": 1.9971392173621686e-06, "loss": 0.7312, "step": 1306 }, { "epoch": 0.19, "grad_norm": 7.03116512298584, "learning_rate": 1.99712724077997e-06, "loss": 0.7814, "step": 1307 }, { "epoch": 0.19, "grad_norm": 6.380654335021973, "learning_rate": 1.997115239216369e-06, "loss": 0.7574, "step": 1308 }, { "epoch": 0.19, "grad_norm": 6.86665153503418, "learning_rate": 1.997103212671665e-06, "loss": 0.8038, "step": 1309 }, { "epoch": 0.19, "grad_norm": 6.647822380065918, "learning_rate": 1.9970911611461605e-06, "loss": 0.7498, "step": 1310 }, { "epoch": 0.19, "grad_norm": 5.719478607177734, "learning_rate": 1.9970790846401573e-06, "loss": 0.7514, "step": 1311 }, { "epoch": 0.19, "grad_norm": 6.427418231964111, "learning_rate": 1.9970669831539577e-06, "loss": 0.7012, "step": 1312 }, { "epoch": 0.19, "grad_norm": 6.332492351531982, "learning_rate": 1.997054856687865e-06, "loss": 0.7146, "step": 1313 }, { "epoch": 0.19, "grad_norm": 6.648989677429199, "learning_rate": 1.9970427052421827e-06, "loss": 0.8502, "step": 1314 }, { "epoch": 0.19, "grad_norm": 5.880488872528076, "learning_rate": 1.997030528817215e-06, "loss": 0.7527, "step": 1315 }, { "epoch": 0.19, "grad_norm": 7.031900882720947, "learning_rate": 1.997018327413268e-06, "loss": 0.8444, "step": 1316 }, { "epoch": 0.19, "grad_norm": 6.343196392059326, "learning_rate": 1.9970061010306466e-06, "loss": 0.8138, "step": 1317 }, { "epoch": 0.19, "grad_norm": 6.130157470703125, "learning_rate": 1.9969938496696576e-06, "loss": 0.7211, "step": 1318 }, { "epoch": 0.19, "grad_norm": 6.373656272888184, "learning_rate": 1.996981573330607e-06, "loss": 0.805, "step": 1319 }, { "epoch": 0.19, "grad_norm": 6.962229251861572, "learning_rate": 1.9969692720138037e-06, "loss": 0.9502, "step": 1320 }, { "epoch": 0.19, "grad_norm": 6.517467021942139, "learning_rate": 1.9969569457195546e-06, "loss": 0.7129, "step": 1321 }, { "epoch": 0.19, "grad_norm": 6.044241428375244, "learning_rate": 1.9969445944481694e-06, "loss": 0.8152, "step": 1322 }, { "epoch": 0.19, "grad_norm": 6.196855545043945, "learning_rate": 1.9969322181999575e-06, "loss": 0.6994, "step": 1323 }, { "epoch": 0.19, "grad_norm": 6.06008243560791, "learning_rate": 1.996919816975228e-06, "loss": 0.7892, "step": 1324 }, { "epoch": 0.19, "grad_norm": 6.586514472961426, "learning_rate": 1.996907390774293e-06, "loss": 0.7689, "step": 1325 }, { "epoch": 0.19, "grad_norm": 8.242253303527832, "learning_rate": 1.996894939597463e-06, "loss": 0.8373, "step": 1326 }, { "epoch": 0.19, "grad_norm": 6.591336727142334, "learning_rate": 1.99688246344505e-06, "loss": 0.6733, "step": 1327 }, { "epoch": 0.19, "grad_norm": 6.281993865966797, "learning_rate": 1.9968699623173667e-06, "loss": 0.7209, "step": 1328 }, { "epoch": 0.19, "grad_norm": 5.588451862335205, "learning_rate": 1.9968574362147263e-06, "loss": 0.7577, "step": 1329 }, { "epoch": 0.19, "grad_norm": 5.86771821975708, "learning_rate": 1.9968448851374423e-06, "loss": 0.755, "step": 1330 }, { "epoch": 0.19, "grad_norm": 6.062346935272217, "learning_rate": 1.9968323090858293e-06, "loss": 0.766, "step": 1331 }, { "epoch": 0.19, "grad_norm": 5.550882816314697, "learning_rate": 1.996819708060203e-06, "loss": 0.7149, "step": 1332 }, { "epoch": 0.19, "grad_norm": 6.0134382247924805, "learning_rate": 1.996807082060878e-06, "loss": 0.6918, "step": 1333 }, { "epoch": 0.19, "grad_norm": 6.403714656829834, "learning_rate": 1.9967944310881714e-06, "loss": 0.7539, "step": 1334 }, { "epoch": 0.19, "grad_norm": 6.358190059661865, "learning_rate": 1.9967817551424e-06, "loss": 0.7617, "step": 1335 }, { "epoch": 0.19, "grad_norm": 6.497697353363037, "learning_rate": 1.9967690542238817e-06, "loss": 0.6842, "step": 1336 }, { "epoch": 0.19, "grad_norm": 6.4063262939453125, "learning_rate": 1.9967563283329336e-06, "loss": 0.7003, "step": 1337 }, { "epoch": 0.19, "grad_norm": 6.360761642456055, "learning_rate": 1.9967435774698757e-06, "loss": 0.8182, "step": 1338 }, { "epoch": 0.19, "grad_norm": 6.922698020935059, "learning_rate": 1.996730801635027e-06, "loss": 0.7856, "step": 1339 }, { "epoch": 0.19, "grad_norm": 6.885452747344971, "learning_rate": 1.9967180008287073e-06, "loss": 0.7141, "step": 1340 }, { "epoch": 0.19, "grad_norm": 7.056413173675537, "learning_rate": 1.996705175051238e-06, "loss": 0.7689, "step": 1341 }, { "epoch": 0.19, "grad_norm": 5.961648941040039, "learning_rate": 1.99669232430294e-06, "loss": 0.8099, "step": 1342 }, { "epoch": 0.19, "grad_norm": 7.061945915222168, "learning_rate": 1.996679448584135e-06, "loss": 0.761, "step": 1343 }, { "epoch": 0.2, "grad_norm": 6.791521072387695, "learning_rate": 1.996666547895146e-06, "loss": 0.6429, "step": 1344 }, { "epoch": 0.2, "grad_norm": 7.197166919708252, "learning_rate": 1.996653622236296e-06, "loss": 0.7653, "step": 1345 }, { "epoch": 0.2, "grad_norm": 6.3064284324646, "learning_rate": 1.9966406716079087e-06, "loss": 0.7932, "step": 1346 }, { "epoch": 0.2, "grad_norm": 6.155755043029785, "learning_rate": 1.996627696010309e-06, "loss": 0.7105, "step": 1347 }, { "epoch": 0.2, "grad_norm": 6.333831787109375, "learning_rate": 1.9966146954438213e-06, "loss": 0.7795, "step": 1348 }, { "epoch": 0.2, "grad_norm": 6.931074142456055, "learning_rate": 1.996601669908772e-06, "loss": 0.6749, "step": 1349 }, { "epoch": 0.2, "grad_norm": 6.126534461975098, "learning_rate": 1.9965886194054874e-06, "loss": 0.7152, "step": 1350 }, { "epoch": 0.2, "grad_norm": 6.62559175491333, "learning_rate": 1.9965755439342943e-06, "loss": 0.8222, "step": 1351 }, { "epoch": 0.2, "grad_norm": 7.014811038970947, "learning_rate": 1.9965624434955197e-06, "loss": 0.814, "step": 1352 }, { "epoch": 0.2, "grad_norm": 7.2537078857421875, "learning_rate": 1.9965493180894927e-06, "loss": 0.7816, "step": 1353 }, { "epoch": 0.2, "grad_norm": 7.335968494415283, "learning_rate": 1.996536167716542e-06, "loss": 0.7561, "step": 1354 }, { "epoch": 0.2, "grad_norm": 6.719372272491455, "learning_rate": 1.996522992376996e-06, "loss": 0.8474, "step": 1355 }, { "epoch": 0.2, "grad_norm": 6.552814483642578, "learning_rate": 1.9965097920711866e-06, "loss": 0.7905, "step": 1356 }, { "epoch": 0.2, "grad_norm": 6.413376808166504, "learning_rate": 1.9964965667994427e-06, "loss": 0.693, "step": 1357 }, { "epoch": 0.2, "grad_norm": 6.490198612213135, "learning_rate": 1.9964833165620965e-06, "loss": 0.7933, "step": 1358 }, { "epoch": 0.2, "grad_norm": 6.665472030639648, "learning_rate": 1.9964700413594803e-06, "loss": 0.8015, "step": 1359 }, { "epoch": 0.2, "grad_norm": 7.503035068511963, "learning_rate": 1.996456741191926e-06, "loss": 0.8882, "step": 1360 }, { "epoch": 0.2, "grad_norm": 6.233697414398193, "learning_rate": 1.9964434160597675e-06, "loss": 0.749, "step": 1361 }, { "epoch": 0.2, "grad_norm": 6.63902473449707, "learning_rate": 1.9964300659633384e-06, "loss": 0.8184, "step": 1362 }, { "epoch": 0.2, "grad_norm": 6.541268825531006, "learning_rate": 1.9964166909029725e-06, "loss": 0.6997, "step": 1363 }, { "epoch": 0.2, "grad_norm": 6.248945236206055, "learning_rate": 1.9964032908790054e-06, "loss": 0.711, "step": 1364 }, { "epoch": 0.2, "grad_norm": 6.678894996643066, "learning_rate": 1.996389865891773e-06, "loss": 0.8677, "step": 1365 }, { "epoch": 0.2, "grad_norm": 8.070524215698242, "learning_rate": 1.9963764159416115e-06, "loss": 0.6949, "step": 1366 }, { "epoch": 0.2, "grad_norm": 7.025221824645996, "learning_rate": 1.996362941028858e-06, "loss": 0.7385, "step": 1367 }, { "epoch": 0.2, "grad_norm": 6.1325812339782715, "learning_rate": 1.99634944115385e-06, "loss": 0.8175, "step": 1368 }, { "epoch": 0.2, "grad_norm": 6.762223243713379, "learning_rate": 1.996335916316925e-06, "loss": 0.7236, "step": 1369 }, { "epoch": 0.2, "grad_norm": 7.401564121246338, "learning_rate": 1.9963223665184234e-06, "loss": 0.7611, "step": 1370 }, { "epoch": 0.2, "grad_norm": 7.312739372253418, "learning_rate": 1.996308791758683e-06, "loss": 0.7629, "step": 1371 }, { "epoch": 0.2, "grad_norm": 6.155926704406738, "learning_rate": 1.9962951920380447e-06, "loss": 0.7775, "step": 1372 }, { "epoch": 0.2, "grad_norm": 6.520792484283447, "learning_rate": 1.9962815673568495e-06, "loss": 0.7378, "step": 1373 }, { "epoch": 0.2, "grad_norm": 6.179468154907227, "learning_rate": 1.9962679177154386e-06, "loss": 0.6776, "step": 1374 }, { "epoch": 0.2, "grad_norm": 6.808004379272461, "learning_rate": 1.9962542431141538e-06, "loss": 0.7186, "step": 1375 }, { "epoch": 0.2, "grad_norm": 6.341739654541016, "learning_rate": 1.9962405435533372e-06, "loss": 0.8028, "step": 1376 }, { "epoch": 0.2, "grad_norm": 7.006252765655518, "learning_rate": 1.9962268190333325e-06, "loss": 0.677, "step": 1377 }, { "epoch": 0.2, "grad_norm": 6.948457717895508, "learning_rate": 1.9962130695544838e-06, "loss": 0.7871, "step": 1378 }, { "epoch": 0.2, "grad_norm": 6.207625389099121, "learning_rate": 1.9961992951171355e-06, "loss": 0.707, "step": 1379 }, { "epoch": 0.2, "grad_norm": 6.602672576904297, "learning_rate": 1.9961854957216323e-06, "loss": 0.7212, "step": 1380 }, { "epoch": 0.2, "grad_norm": 7.542318344116211, "learning_rate": 1.99617167136832e-06, "loss": 0.7791, "step": 1381 }, { "epoch": 0.2, "grad_norm": 6.452097415924072, "learning_rate": 1.9961578220575454e-06, "loss": 0.8732, "step": 1382 }, { "epoch": 0.2, "grad_norm": 6.9438066482543945, "learning_rate": 1.9961439477896546e-06, "loss": 0.6831, "step": 1383 }, { "epoch": 0.2, "grad_norm": 6.353983402252197, "learning_rate": 1.996130048564996e-06, "loss": 0.7854, "step": 1384 }, { "epoch": 0.2, "grad_norm": 6.733219146728516, "learning_rate": 1.996116124383918e-06, "loss": 0.7837, "step": 1385 }, { "epoch": 0.2, "grad_norm": 6.805755138397217, "learning_rate": 1.9961021752467684e-06, "loss": 0.7388, "step": 1386 }, { "epoch": 0.2, "grad_norm": 7.1090779304504395, "learning_rate": 1.996088201153898e-06, "loss": 0.8373, "step": 1387 }, { "epoch": 0.2, "grad_norm": 6.559101581573486, "learning_rate": 1.9960742021056556e-06, "loss": 0.7727, "step": 1388 }, { "epoch": 0.2, "grad_norm": 6.592065334320068, "learning_rate": 1.9960601781023927e-06, "loss": 0.7357, "step": 1389 }, { "epoch": 0.2, "grad_norm": 6.72956657409668, "learning_rate": 1.99604612914446e-06, "loss": 0.7628, "step": 1390 }, { "epoch": 0.2, "grad_norm": 7.153213977813721, "learning_rate": 1.9960320552322106e-06, "loss": 0.7231, "step": 1391 }, { "epoch": 0.2, "grad_norm": 7.252281665802002, "learning_rate": 1.9960179563659965e-06, "loss": 0.8271, "step": 1392 }, { "epoch": 0.2, "grad_norm": 6.173803329467773, "learning_rate": 1.9960038325461706e-06, "loss": 0.6956, "step": 1393 }, { "epoch": 0.2, "grad_norm": 6.722384929656982, "learning_rate": 1.995989683773087e-06, "loss": 0.837, "step": 1394 }, { "epoch": 0.2, "grad_norm": 6.412632465362549, "learning_rate": 1.9959755100471e-06, "loss": 0.7124, "step": 1395 }, { "epoch": 0.2, "grad_norm": 7.164414882659912, "learning_rate": 1.995961311368565e-06, "loss": 0.8142, "step": 1396 }, { "epoch": 0.2, "grad_norm": 6.150646686553955, "learning_rate": 1.9959470877378377e-06, "loss": 0.7731, "step": 1397 }, { "epoch": 0.2, "grad_norm": 5.652933597564697, "learning_rate": 1.9959328391552747e-06, "loss": 0.6783, "step": 1398 }, { "epoch": 0.2, "grad_norm": 6.962897300720215, "learning_rate": 1.9959185656212323e-06, "loss": 0.7685, "step": 1399 }, { "epoch": 0.2, "grad_norm": 7.545159339904785, "learning_rate": 1.9959042671360684e-06, "loss": 0.7484, "step": 1400 }, { "epoch": 0.2, "grad_norm": 6.243046760559082, "learning_rate": 1.995889943700141e-06, "loss": 0.7961, "step": 1401 }, { "epoch": 0.2, "grad_norm": 7.505379676818848, "learning_rate": 1.99587559531381e-06, "loss": 0.8695, "step": 1402 }, { "epoch": 0.2, "grad_norm": 6.637495517730713, "learning_rate": 1.9958612219774333e-06, "loss": 0.7859, "step": 1403 }, { "epoch": 0.2, "grad_norm": 6.333225250244141, "learning_rate": 1.995846823691372e-06, "loss": 0.8499, "step": 1404 }, { "epoch": 0.2, "grad_norm": 6.998693466186523, "learning_rate": 1.995832400455987e-06, "loss": 0.8321, "step": 1405 }, { "epoch": 0.2, "grad_norm": 6.755865097045898, "learning_rate": 1.995817952271639e-06, "loss": 0.7586, "step": 1406 }, { "epoch": 0.2, "grad_norm": 6.216586589813232, "learning_rate": 1.99580347913869e-06, "loss": 0.717, "step": 1407 }, { "epoch": 0.2, "grad_norm": 6.86007022857666, "learning_rate": 1.995788981057503e-06, "loss": 0.7298, "step": 1408 }, { "epoch": 0.2, "grad_norm": 6.210324287414551, "learning_rate": 1.995774458028441e-06, "loss": 0.7773, "step": 1409 }, { "epoch": 0.2, "grad_norm": 6.883613109588623, "learning_rate": 1.995759910051868e-06, "loss": 0.7952, "step": 1410 }, { "epoch": 0.2, "grad_norm": 6.412996768951416, "learning_rate": 1.9957453371281487e-06, "loss": 0.7405, "step": 1411 }, { "epoch": 0.2, "grad_norm": 6.175074100494385, "learning_rate": 1.9957307392576475e-06, "loss": 0.784, "step": 1412 }, { "epoch": 0.21, "grad_norm": 6.1865949630737305, "learning_rate": 1.9957161164407305e-06, "loss": 0.7302, "step": 1413 }, { "epoch": 0.21, "grad_norm": 5.933686256408691, "learning_rate": 1.9957014686777645e-06, "loss": 0.7001, "step": 1414 }, { "epoch": 0.21, "grad_norm": 6.02923583984375, "learning_rate": 1.9956867959691157e-06, "loss": 0.8045, "step": 1415 }, { "epoch": 0.21, "grad_norm": 6.650866985321045, "learning_rate": 1.995672098315152e-06, "loss": 0.7542, "step": 1416 }, { "epoch": 0.21, "grad_norm": 6.710185527801514, "learning_rate": 1.9956573757162416e-06, "loss": 0.9032, "step": 1417 }, { "epoch": 0.21, "grad_norm": 5.9411139488220215, "learning_rate": 1.9956426281727537e-06, "loss": 0.7449, "step": 1418 }, { "epoch": 0.21, "grad_norm": 6.500853061676025, "learning_rate": 1.9956278556850577e-06, "loss": 0.7796, "step": 1419 }, { "epoch": 0.21, "grad_norm": 6.112471103668213, "learning_rate": 1.995613058253523e-06, "loss": 0.7444, "step": 1420 }, { "epoch": 0.21, "grad_norm": 6.261082649230957, "learning_rate": 1.995598235878521e-06, "loss": 0.7507, "step": 1421 }, { "epoch": 0.21, "grad_norm": 5.955245494842529, "learning_rate": 1.995583388560423e-06, "loss": 0.8269, "step": 1422 }, { "epoch": 0.21, "grad_norm": 6.715225696563721, "learning_rate": 1.9955685162996008e-06, "loss": 0.8002, "step": 1423 }, { "epoch": 0.21, "grad_norm": 6.183969497680664, "learning_rate": 1.995553619096427e-06, "loss": 0.8208, "step": 1424 }, { "epoch": 0.21, "grad_norm": 5.790566921234131, "learning_rate": 1.9955386969512747e-06, "loss": 0.7577, "step": 1425 }, { "epoch": 0.21, "grad_norm": 5.975508213043213, "learning_rate": 1.9955237498645185e-06, "loss": 0.7403, "step": 1426 }, { "epoch": 0.21, "grad_norm": 6.617146968841553, "learning_rate": 1.9955087778365316e-06, "loss": 0.7468, "step": 1427 }, { "epoch": 0.21, "grad_norm": 6.24402379989624, "learning_rate": 1.99549378086769e-06, "loss": 0.8009, "step": 1428 }, { "epoch": 0.21, "grad_norm": 6.055834770202637, "learning_rate": 1.9954787589583695e-06, "loss": 0.6605, "step": 1429 }, { "epoch": 0.21, "grad_norm": 6.923086166381836, "learning_rate": 1.995463712108946e-06, "loss": 0.7189, "step": 1430 }, { "epoch": 0.21, "grad_norm": 7.980082035064697, "learning_rate": 1.9954486403197967e-06, "loss": 0.7394, "step": 1431 }, { "epoch": 0.21, "grad_norm": 6.53875207901001, "learning_rate": 1.995433543591299e-06, "loss": 0.7204, "step": 1432 }, { "epoch": 0.21, "grad_norm": 6.362068176269531, "learning_rate": 1.9954184219238315e-06, "loss": 0.7025, "step": 1433 }, { "epoch": 0.21, "grad_norm": 7.876392841339111, "learning_rate": 1.995403275317773e-06, "loss": 0.8199, "step": 1434 }, { "epoch": 0.21, "grad_norm": 5.7002787590026855, "learning_rate": 1.995388103773502e-06, "loss": 0.7227, "step": 1435 }, { "epoch": 0.21, "grad_norm": 7.2471232414245605, "learning_rate": 1.9953729072914e-06, "loss": 0.767, "step": 1436 }, { "epoch": 0.21, "grad_norm": 6.899710178375244, "learning_rate": 1.9953576858718473e-06, "loss": 0.8126, "step": 1437 }, { "epoch": 0.21, "grad_norm": 6.0599751472473145, "learning_rate": 1.995342439515225e-06, "loss": 0.6992, "step": 1438 }, { "epoch": 0.21, "grad_norm": 6.695732116699219, "learning_rate": 1.9953271682219146e-06, "loss": 0.8285, "step": 1439 }, { "epoch": 0.21, "grad_norm": 6.3569536209106445, "learning_rate": 1.9953118719922997e-06, "loss": 0.7532, "step": 1440 }, { "epoch": 0.21, "grad_norm": 6.56758451461792, "learning_rate": 1.9952965508267628e-06, "loss": 0.8335, "step": 1441 }, { "epoch": 0.21, "grad_norm": 6.222428321838379, "learning_rate": 1.995281204725688e-06, "loss": 0.6904, "step": 1442 }, { "epoch": 0.21, "grad_norm": 7.042893886566162, "learning_rate": 1.9952658336894605e-06, "loss": 0.7738, "step": 1443 }, { "epoch": 0.21, "grad_norm": 6.841026782989502, "learning_rate": 1.995250437718464e-06, "loss": 0.8278, "step": 1444 }, { "epoch": 0.21, "grad_norm": 7.169965744018555, "learning_rate": 1.9952350168130847e-06, "loss": 0.7626, "step": 1445 }, { "epoch": 0.21, "grad_norm": 6.216003894805908, "learning_rate": 1.9952195709737097e-06, "loss": 0.764, "step": 1446 }, { "epoch": 0.21, "grad_norm": 7.004225254058838, "learning_rate": 1.9952041002007252e-06, "loss": 0.8795, "step": 1447 }, { "epoch": 0.21, "grad_norm": 6.517050266265869, "learning_rate": 1.9951886044945187e-06, "loss": 0.7546, "step": 1448 }, { "epoch": 0.21, "grad_norm": 6.742801666259766, "learning_rate": 1.995173083855479e-06, "loss": 0.7629, "step": 1449 }, { "epoch": 0.21, "grad_norm": 6.408946514129639, "learning_rate": 1.995157538283995e-06, "loss": 0.7785, "step": 1450 }, { "epoch": 0.21, "grad_norm": 7.679185390472412, "learning_rate": 1.995141967780455e-06, "loss": 0.81, "step": 1451 }, { "epoch": 0.21, "grad_norm": 5.928426265716553, "learning_rate": 1.9951263723452503e-06, "loss": 0.7769, "step": 1452 }, { "epoch": 0.21, "grad_norm": 6.441939353942871, "learning_rate": 1.9951107519787715e-06, "loss": 0.734, "step": 1453 }, { "epoch": 0.21, "grad_norm": 6.075709819793701, "learning_rate": 1.9950951066814096e-06, "loss": 0.7397, "step": 1454 }, { "epoch": 0.21, "grad_norm": 6.986750602722168, "learning_rate": 1.9950794364535566e-06, "loss": 0.7765, "step": 1455 }, { "epoch": 0.21, "grad_norm": 6.6699137687683105, "learning_rate": 1.9950637412956054e-06, "loss": 0.7564, "step": 1456 }, { "epoch": 0.21, "grad_norm": 6.941072463989258, "learning_rate": 1.9950480212079487e-06, "loss": 0.826, "step": 1457 }, { "epoch": 0.21, "grad_norm": 6.839658260345459, "learning_rate": 1.9950322761909807e-06, "loss": 0.6974, "step": 1458 }, { "epoch": 0.21, "grad_norm": 5.840488433837891, "learning_rate": 1.9950165062450957e-06, "loss": 0.7569, "step": 1459 }, { "epoch": 0.21, "grad_norm": 6.910712242126465, "learning_rate": 1.9950007113706892e-06, "loss": 0.7791, "step": 1460 }, { "epoch": 0.21, "grad_norm": 9.694135665893555, "learning_rate": 1.994984891568156e-06, "loss": 0.7857, "step": 1461 }, { "epoch": 0.21, "grad_norm": 6.254488945007324, "learning_rate": 1.994969046837893e-06, "loss": 0.7307, "step": 1462 }, { "epoch": 0.21, "grad_norm": 6.6944756507873535, "learning_rate": 1.9949531771802976e-06, "loss": 0.7957, "step": 1463 }, { "epoch": 0.21, "grad_norm": 6.644168376922607, "learning_rate": 1.994937282595767e-06, "loss": 0.8065, "step": 1464 }, { "epoch": 0.21, "grad_norm": 6.61159086227417, "learning_rate": 1.9949213630846993e-06, "loss": 0.7392, "step": 1465 }, { "epoch": 0.21, "grad_norm": 6.727344036102295, "learning_rate": 1.9949054186474933e-06, "loss": 0.7648, "step": 1466 }, { "epoch": 0.21, "grad_norm": 7.378589153289795, "learning_rate": 1.9948894492845488e-06, "loss": 0.8189, "step": 1467 }, { "epoch": 0.21, "grad_norm": 7.7914886474609375, "learning_rate": 1.9948734549962655e-06, "loss": 0.7365, "step": 1468 }, { "epoch": 0.21, "grad_norm": 6.888306140899658, "learning_rate": 1.9948574357830443e-06, "loss": 0.717, "step": 1469 }, { "epoch": 0.21, "grad_norm": 6.4952311515808105, "learning_rate": 1.9948413916452865e-06, "loss": 0.7776, "step": 1470 }, { "epoch": 0.21, "grad_norm": 7.109120845794678, "learning_rate": 1.994825322583394e-06, "loss": 0.7209, "step": 1471 }, { "epoch": 0.21, "grad_norm": 7.045886516571045, "learning_rate": 1.99480922859777e-06, "loss": 0.7466, "step": 1472 }, { "epoch": 0.21, "grad_norm": 6.340660572052002, "learning_rate": 1.9947931096888168e-06, "loss": 0.7697, "step": 1473 }, { "epoch": 0.21, "grad_norm": 6.77673864364624, "learning_rate": 1.9947769658569383e-06, "loss": 0.7641, "step": 1474 }, { "epoch": 0.21, "grad_norm": 6.510668754577637, "learning_rate": 1.9947607971025396e-06, "loss": 0.8022, "step": 1475 }, { "epoch": 0.21, "grad_norm": 7.188275337219238, "learning_rate": 1.9947446034260252e-06, "loss": 0.8353, "step": 1476 }, { "epoch": 0.21, "grad_norm": 6.421457290649414, "learning_rate": 1.9947283848278013e-06, "loss": 0.7585, "step": 1477 }, { "epoch": 0.21, "grad_norm": 5.892510890960693, "learning_rate": 1.9947121413082743e-06, "loss": 0.7484, "step": 1478 }, { "epoch": 0.21, "grad_norm": 6.144556522369385, "learning_rate": 1.99469587286785e-06, "loss": 0.7124, "step": 1479 }, { "epoch": 0.21, "grad_norm": 6.104055881500244, "learning_rate": 1.9946795795069377e-06, "loss": 0.6997, "step": 1480 }, { "epoch": 0.21, "grad_norm": 6.653101444244385, "learning_rate": 1.994663261225944e-06, "loss": 0.8114, "step": 1481 }, { "epoch": 0.22, "grad_norm": 6.28624153137207, "learning_rate": 1.994646918025279e-06, "loss": 0.8319, "step": 1482 }, { "epoch": 0.22, "grad_norm": 6.2506608963012695, "learning_rate": 1.9946305499053513e-06, "loss": 0.7308, "step": 1483 }, { "epoch": 0.22, "grad_norm": 6.241829872131348, "learning_rate": 1.994614156866571e-06, "loss": 0.7434, "step": 1484 }, { "epoch": 0.22, "grad_norm": 6.7396159172058105, "learning_rate": 1.9945977389093496e-06, "loss": 0.7331, "step": 1485 }, { "epoch": 0.22, "grad_norm": 6.783832550048828, "learning_rate": 1.9945812960340976e-06, "loss": 0.7668, "step": 1486 }, { "epoch": 0.22, "grad_norm": 6.789594650268555, "learning_rate": 1.9945648282412276e-06, "loss": 0.8075, "step": 1487 }, { "epoch": 0.22, "grad_norm": 8.005682945251465, "learning_rate": 1.9945483355311513e-06, "loss": 0.9215, "step": 1488 }, { "epoch": 0.22, "grad_norm": 6.435446739196777, "learning_rate": 1.994531817904283e-06, "loss": 0.7948, "step": 1489 }, { "epoch": 0.22, "grad_norm": 7.3173956871032715, "learning_rate": 1.9945152753610357e-06, "loss": 0.8421, "step": 1490 }, { "epoch": 0.22, "grad_norm": 7.6508893966674805, "learning_rate": 1.994498707901824e-06, "loss": 0.8035, "step": 1491 }, { "epoch": 0.22, "grad_norm": 6.892241954803467, "learning_rate": 1.9944821155270626e-06, "loss": 0.761, "step": 1492 }, { "epoch": 0.22, "grad_norm": 6.851191520690918, "learning_rate": 1.994465498237168e-06, "loss": 0.6914, "step": 1493 }, { "epoch": 0.22, "grad_norm": 6.207319259643555, "learning_rate": 1.9944488560325567e-06, "loss": 0.83, "step": 1494 }, { "epoch": 0.22, "grad_norm": 5.785516738891602, "learning_rate": 1.9944321889136447e-06, "loss": 0.7587, "step": 1495 }, { "epoch": 0.22, "grad_norm": 6.011747360229492, "learning_rate": 1.99441549688085e-06, "loss": 0.6969, "step": 1496 }, { "epoch": 0.22, "grad_norm": 6.355217933654785, "learning_rate": 1.994398779934591e-06, "loss": 0.8099, "step": 1497 }, { "epoch": 0.22, "grad_norm": 6.271841526031494, "learning_rate": 1.994382038075286e-06, "loss": 0.6998, "step": 1498 }, { "epoch": 0.22, "grad_norm": 6.1845269203186035, "learning_rate": 1.994365271303355e-06, "loss": 0.746, "step": 1499 }, { "epoch": 0.22, "grad_norm": 5.79701566696167, "learning_rate": 1.9943484796192174e-06, "loss": 0.7476, "step": 1500 }, { "epoch": 0.22, "grad_norm": 6.072751998901367, "learning_rate": 1.9943316630232946e-06, "loss": 0.6539, "step": 1501 }, { "epoch": 0.22, "grad_norm": 7.051633834838867, "learning_rate": 1.994314821516008e-06, "loss": 0.8179, "step": 1502 }, { "epoch": 0.22, "grad_norm": 6.404724597930908, "learning_rate": 1.9942979550977785e-06, "loss": 0.7011, "step": 1503 }, { "epoch": 0.22, "grad_norm": 6.336585998535156, "learning_rate": 1.9942810637690294e-06, "loss": 0.7887, "step": 1504 }, { "epoch": 0.22, "grad_norm": 6.5498576164245605, "learning_rate": 1.994264147530184e-06, "loss": 0.8227, "step": 1505 }, { "epoch": 0.22, "grad_norm": 6.258854389190674, "learning_rate": 1.9942472063816657e-06, "loss": 0.8231, "step": 1506 }, { "epoch": 0.22, "grad_norm": 6.453028678894043, "learning_rate": 1.994230240323899e-06, "loss": 0.7455, "step": 1507 }, { "epoch": 0.22, "grad_norm": 6.353989601135254, "learning_rate": 1.9942132493573094e-06, "loss": 0.8183, "step": 1508 }, { "epoch": 0.22, "grad_norm": 6.09046745300293, "learning_rate": 1.9941962334823223e-06, "loss": 0.7995, "step": 1509 }, { "epoch": 0.22, "grad_norm": 6.758981227874756, "learning_rate": 1.9941791926993636e-06, "loss": 0.7803, "step": 1510 }, { "epoch": 0.22, "grad_norm": 6.729323387145996, "learning_rate": 1.994162127008861e-06, "loss": 0.8302, "step": 1511 }, { "epoch": 0.22, "grad_norm": 6.986282825469971, "learning_rate": 1.9941450364112413e-06, "loss": 0.8325, "step": 1512 }, { "epoch": 0.22, "grad_norm": 6.05620813369751, "learning_rate": 1.994127920906933e-06, "loss": 0.7286, "step": 1513 }, { "epoch": 0.22, "grad_norm": 6.497483730316162, "learning_rate": 1.9941107804963654e-06, "loss": 0.8047, "step": 1514 }, { "epoch": 0.22, "grad_norm": 6.372478008270264, "learning_rate": 1.9940936151799673e-06, "loss": 0.7153, "step": 1515 }, { "epoch": 0.22, "grad_norm": 6.1083455085754395, "learning_rate": 1.9940764249581685e-06, "loss": 0.7485, "step": 1516 }, { "epoch": 0.22, "grad_norm": 7.1516289710998535, "learning_rate": 1.9940592098314004e-06, "loss": 0.8068, "step": 1517 }, { "epoch": 0.22, "grad_norm": 6.772476673126221, "learning_rate": 1.994041969800094e-06, "loss": 0.7102, "step": 1518 }, { "epoch": 0.22, "grad_norm": 6.89580774307251, "learning_rate": 1.994024704864681e-06, "loss": 0.7273, "step": 1519 }, { "epoch": 0.22, "grad_norm": 6.825033664703369, "learning_rate": 1.994007415025594e-06, "loss": 0.7444, "step": 1520 }, { "epoch": 0.22, "grad_norm": 6.537186622619629, "learning_rate": 1.9939901002832673e-06, "loss": 0.7952, "step": 1521 }, { "epoch": 0.22, "grad_norm": 6.181982517242432, "learning_rate": 1.9939727606381328e-06, "loss": 0.7163, "step": 1522 }, { "epoch": 0.22, "grad_norm": 6.442049026489258, "learning_rate": 1.9939553960906256e-06, "loss": 0.668, "step": 1523 }, { "epoch": 0.22, "grad_norm": 6.525991439819336, "learning_rate": 1.9939380066411816e-06, "loss": 0.7169, "step": 1524 }, { "epoch": 0.22, "grad_norm": 6.95180082321167, "learning_rate": 1.9939205922902357e-06, "loss": 0.8129, "step": 1525 }, { "epoch": 0.22, "grad_norm": 5.89187479019165, "learning_rate": 1.993903153038224e-06, "loss": 0.6874, "step": 1526 }, { "epoch": 0.22, "grad_norm": 6.249383926391602, "learning_rate": 1.993885688885584e-06, "loss": 0.7224, "step": 1527 }, { "epoch": 0.22, "grad_norm": 6.510173797607422, "learning_rate": 1.993868199832753e-06, "loss": 0.8471, "step": 1528 }, { "epoch": 0.22, "grad_norm": 5.987766742706299, "learning_rate": 1.9938506858801687e-06, "loss": 0.743, "step": 1529 }, { "epoch": 0.22, "grad_norm": 6.379124641418457, "learning_rate": 1.993833147028271e-06, "loss": 0.7414, "step": 1530 }, { "epoch": 0.22, "grad_norm": 6.8753485679626465, "learning_rate": 1.993815583277498e-06, "loss": 0.7802, "step": 1531 }, { "epoch": 0.22, "grad_norm": 6.211835861206055, "learning_rate": 1.9937979946282905e-06, "loss": 0.7964, "step": 1532 }, { "epoch": 0.22, "grad_norm": 6.052674293518066, "learning_rate": 1.9937803810810887e-06, "loss": 0.7576, "step": 1533 }, { "epoch": 0.22, "grad_norm": 7.324676513671875, "learning_rate": 1.993762742636335e-06, "loss": 0.848, "step": 1534 }, { "epoch": 0.22, "grad_norm": 6.227332592010498, "learning_rate": 1.9937450792944695e-06, "loss": 0.7096, "step": 1535 }, { "epoch": 0.22, "grad_norm": 6.042738914489746, "learning_rate": 1.9937273910559364e-06, "loss": 0.7468, "step": 1536 }, { "epoch": 0.22, "grad_norm": 5.976377010345459, "learning_rate": 1.993709677921178e-06, "loss": 0.7397, "step": 1537 }, { "epoch": 0.22, "grad_norm": 7.1994805335998535, "learning_rate": 1.9936919398906384e-06, "loss": 0.8306, "step": 1538 }, { "epoch": 0.22, "grad_norm": 6.836921691894531, "learning_rate": 1.993674176964761e-06, "loss": 0.691, "step": 1539 }, { "epoch": 0.22, "grad_norm": 6.675660610198975, "learning_rate": 1.993656389143993e-06, "loss": 0.7644, "step": 1540 }, { "epoch": 0.22, "grad_norm": 6.082278251647949, "learning_rate": 1.9936385764287774e-06, "loss": 0.702, "step": 1541 }, { "epoch": 0.22, "grad_norm": 5.9145379066467285, "learning_rate": 1.9936207388195623e-06, "loss": 0.6728, "step": 1542 }, { "epoch": 0.22, "grad_norm": 6.665125846862793, "learning_rate": 1.9936028763167943e-06, "loss": 0.8422, "step": 1543 }, { "epoch": 0.22, "grad_norm": 6.828614234924316, "learning_rate": 1.9935849889209205e-06, "loss": 0.6806, "step": 1544 }, { "epoch": 0.22, "grad_norm": 6.238155364990234, "learning_rate": 1.9935670766323887e-06, "loss": 0.7071, "step": 1545 }, { "epoch": 0.22, "grad_norm": 6.791354179382324, "learning_rate": 1.993549139451649e-06, "loss": 0.8383, "step": 1546 }, { "epoch": 0.22, "grad_norm": 6.738759517669678, "learning_rate": 1.9935311773791493e-06, "loss": 0.7435, "step": 1547 }, { "epoch": 0.22, "grad_norm": 6.138204097747803, "learning_rate": 1.9935131904153407e-06, "loss": 0.7878, "step": 1548 }, { "epoch": 0.22, "grad_norm": 6.423949718475342, "learning_rate": 1.9934951785606733e-06, "loss": 0.8238, "step": 1549 }, { "epoch": 0.22, "grad_norm": 6.412424564361572, "learning_rate": 1.9934771418155983e-06, "loss": 0.8104, "step": 1550 }, { "epoch": 0.23, "grad_norm": 6.87380838394165, "learning_rate": 1.993459080180568e-06, "loss": 0.79, "step": 1551 }, { "epoch": 0.23, "grad_norm": 6.280651092529297, "learning_rate": 1.9934409936560344e-06, "loss": 0.859, "step": 1552 }, { "epoch": 0.23, "grad_norm": 6.617197036743164, "learning_rate": 1.993422882242451e-06, "loss": 0.8229, "step": 1553 }, { "epoch": 0.23, "grad_norm": 6.170071601867676, "learning_rate": 1.9934047459402714e-06, "loss": 0.717, "step": 1554 }, { "epoch": 0.23, "grad_norm": 6.440677165985107, "learning_rate": 1.9933865847499497e-06, "loss": 0.7811, "step": 1555 }, { "epoch": 0.23, "grad_norm": 5.938858985900879, "learning_rate": 1.9933683986719417e-06, "loss": 0.7315, "step": 1556 }, { "epoch": 0.23, "grad_norm": 5.881729602813721, "learning_rate": 1.993350187706702e-06, "loss": 0.7594, "step": 1557 }, { "epoch": 0.23, "grad_norm": 6.516158580780029, "learning_rate": 1.993331951854688e-06, "loss": 0.8119, "step": 1558 }, { "epoch": 0.23, "grad_norm": 6.679920673370361, "learning_rate": 1.993313691116356e-06, "loss": 0.7704, "step": 1559 }, { "epoch": 0.23, "grad_norm": 6.8006911277771, "learning_rate": 1.9932954054921628e-06, "loss": 0.7889, "step": 1560 }, { "epoch": 0.23, "grad_norm": 6.491825103759766, "learning_rate": 1.9932770949825673e-06, "loss": 0.8037, "step": 1561 }, { "epoch": 0.23, "grad_norm": 6.885862827301025, "learning_rate": 1.9932587595880285e-06, "loss": 0.6903, "step": 1562 }, { "epoch": 0.23, "grad_norm": 6.846004962921143, "learning_rate": 1.9932403993090048e-06, "loss": 0.788, "step": 1563 }, { "epoch": 0.23, "grad_norm": 7.09016752243042, "learning_rate": 1.993222014145957e-06, "loss": 0.7279, "step": 1564 }, { "epoch": 0.23, "grad_norm": 6.2021660804748535, "learning_rate": 1.9932036040993453e-06, "loss": 0.7622, "step": 1565 }, { "epoch": 0.23, "grad_norm": 6.646659851074219, "learning_rate": 1.9931851691696316e-06, "loss": 0.7528, "step": 1566 }, { "epoch": 0.23, "grad_norm": 6.3010783195495605, "learning_rate": 1.993166709357277e-06, "loss": 0.7924, "step": 1567 }, { "epoch": 0.23, "grad_norm": 7.4296674728393555, "learning_rate": 1.9931482246627436e-06, "loss": 0.7488, "step": 1568 }, { "epoch": 0.23, "grad_norm": 5.8960442543029785, "learning_rate": 1.993129715086496e-06, "loss": 0.8006, "step": 1569 }, { "epoch": 0.23, "grad_norm": 6.5313801765441895, "learning_rate": 1.9931111806289964e-06, "loss": 0.7211, "step": 1570 }, { "epoch": 0.23, "grad_norm": 6.681556224822998, "learning_rate": 1.99309262129071e-06, "loss": 0.7802, "step": 1571 }, { "epoch": 0.23, "grad_norm": 7.57988977432251, "learning_rate": 1.9930740370721017e-06, "loss": 0.8685, "step": 1572 }, { "epoch": 0.23, "grad_norm": 6.409794330596924, "learning_rate": 1.993055427973637e-06, "loss": 0.6774, "step": 1573 }, { "epoch": 0.23, "grad_norm": 6.56201696395874, "learning_rate": 1.9930367939957823e-06, "loss": 0.8181, "step": 1574 }, { "epoch": 0.23, "grad_norm": 6.652411937713623, "learning_rate": 1.9930181351390037e-06, "loss": 0.6914, "step": 1575 }, { "epoch": 0.23, "grad_norm": 6.967761516571045, "learning_rate": 1.9929994514037696e-06, "loss": 0.7971, "step": 1576 }, { "epoch": 0.23, "grad_norm": 6.546811103820801, "learning_rate": 1.9929807427905477e-06, "loss": 0.7704, "step": 1577 }, { "epoch": 0.23, "grad_norm": 6.349756717681885, "learning_rate": 1.9929620092998064e-06, "loss": 0.8003, "step": 1578 }, { "epoch": 0.23, "grad_norm": 6.010904788970947, "learning_rate": 1.9929432509320158e-06, "loss": 0.8223, "step": 1579 }, { "epoch": 0.23, "grad_norm": 7.076045036315918, "learning_rate": 1.992924467687645e-06, "loss": 0.7355, "step": 1580 }, { "epoch": 0.23, "grad_norm": 7.051626205444336, "learning_rate": 1.992905659567165e-06, "loss": 0.7265, "step": 1581 }, { "epoch": 0.23, "grad_norm": 6.445630073547363, "learning_rate": 1.9928868265710475e-06, "loss": 0.7169, "step": 1582 }, { "epoch": 0.23, "grad_norm": 6.05335807800293, "learning_rate": 1.9928679686997634e-06, "loss": 0.7534, "step": 1583 }, { "epoch": 0.23, "grad_norm": 7.915823936462402, "learning_rate": 1.992849085953786e-06, "loss": 0.8087, "step": 1584 }, { "epoch": 0.23, "grad_norm": 6.1371750831604, "learning_rate": 1.9928301783335876e-06, "loss": 0.7236, "step": 1585 }, { "epoch": 0.23, "grad_norm": 6.736891269683838, "learning_rate": 1.992811245839642e-06, "loss": 0.7995, "step": 1586 }, { "epoch": 0.23, "grad_norm": 6.2000203132629395, "learning_rate": 1.9927922884724243e-06, "loss": 0.7274, "step": 1587 }, { "epoch": 0.23, "grad_norm": 7.27178955078125, "learning_rate": 1.9927733062324086e-06, "loss": 0.7183, "step": 1588 }, { "epoch": 0.23, "grad_norm": 6.496295928955078, "learning_rate": 1.992754299120071e-06, "loss": 0.7223, "step": 1589 }, { "epoch": 0.23, "grad_norm": 6.66440486907959, "learning_rate": 1.992735267135887e-06, "loss": 0.6138, "step": 1590 }, { "epoch": 0.23, "grad_norm": 5.805711269378662, "learning_rate": 1.9927162102803346e-06, "loss": 0.7222, "step": 1591 }, { "epoch": 0.23, "grad_norm": 5.6600341796875, "learning_rate": 1.9926971285538903e-06, "loss": 0.7374, "step": 1592 }, { "epoch": 0.23, "grad_norm": 6.717846393585205, "learning_rate": 1.9926780219570327e-06, "loss": 0.6956, "step": 1593 }, { "epoch": 0.23, "grad_norm": 6.072775840759277, "learning_rate": 1.9926588904902395e-06, "loss": 0.771, "step": 1594 }, { "epoch": 0.23, "grad_norm": 6.723759174346924, "learning_rate": 1.992639734153991e-06, "loss": 0.8288, "step": 1595 }, { "epoch": 0.23, "grad_norm": 6.273642539978027, "learning_rate": 1.9926205529487673e-06, "loss": 0.7493, "step": 1596 }, { "epoch": 0.23, "grad_norm": 6.353176593780518, "learning_rate": 1.992601346875048e-06, "loss": 0.7789, "step": 1597 }, { "epoch": 0.23, "grad_norm": 6.107110023498535, "learning_rate": 1.992582115933315e-06, "loss": 0.683, "step": 1598 }, { "epoch": 0.23, "grad_norm": 6.63279390335083, "learning_rate": 1.9925628601240494e-06, "loss": 0.8214, "step": 1599 }, { "epoch": 0.23, "grad_norm": 6.261913299560547, "learning_rate": 1.9925435794477346e-06, "loss": 0.7195, "step": 1600 }, { "epoch": 0.23, "grad_norm": 6.377159118652344, "learning_rate": 1.992524273904853e-06, "loss": 0.7948, "step": 1601 }, { "epoch": 0.23, "grad_norm": 6.973994731903076, "learning_rate": 1.992504943495888e-06, "loss": 0.8441, "step": 1602 }, { "epoch": 0.23, "grad_norm": 6.586246967315674, "learning_rate": 1.992485588221325e-06, "loss": 0.7708, "step": 1603 }, { "epoch": 0.23, "grad_norm": 6.191169738769531, "learning_rate": 1.992466208081648e-06, "loss": 0.7532, "step": 1604 }, { "epoch": 0.23, "grad_norm": 6.261021614074707, "learning_rate": 1.992446803077342e-06, "loss": 0.776, "step": 1605 }, { "epoch": 0.23, "grad_norm": 5.94381046295166, "learning_rate": 1.9924273732088947e-06, "loss": 0.7848, "step": 1606 }, { "epoch": 0.23, "grad_norm": 7.355563163757324, "learning_rate": 1.9924079184767918e-06, "loss": 0.7437, "step": 1607 }, { "epoch": 0.23, "grad_norm": 6.565474510192871, "learning_rate": 1.9923884388815207e-06, "loss": 0.7307, "step": 1608 }, { "epoch": 0.23, "grad_norm": 6.185554027557373, "learning_rate": 1.99236893442357e-06, "loss": 0.7793, "step": 1609 }, { "epoch": 0.23, "grad_norm": 6.345939636230469, "learning_rate": 1.9923494051034286e-06, "loss": 0.7345, "step": 1610 }, { "epoch": 0.23, "grad_norm": 6.717517852783203, "learning_rate": 1.9923298509215846e-06, "loss": 0.7502, "step": 1611 }, { "epoch": 0.23, "grad_norm": 7.292919158935547, "learning_rate": 1.9923102718785287e-06, "loss": 0.8765, "step": 1612 }, { "epoch": 0.23, "grad_norm": 6.158069610595703, "learning_rate": 1.9922906679747516e-06, "loss": 0.7649, "step": 1613 }, { "epoch": 0.23, "grad_norm": 6.112733840942383, "learning_rate": 1.992271039210744e-06, "loss": 0.6623, "step": 1614 }, { "epoch": 0.23, "grad_norm": 5.780876159667969, "learning_rate": 1.9922513855869973e-06, "loss": 0.7111, "step": 1615 }, { "epoch": 0.23, "grad_norm": 6.246610641479492, "learning_rate": 1.9922317071040047e-06, "loss": 0.8015, "step": 1616 }, { "epoch": 0.23, "grad_norm": 6.478570461273193, "learning_rate": 1.992212003762259e-06, "loss": 0.7762, "step": 1617 }, { "epoch": 0.23, "grad_norm": 6.798916339874268, "learning_rate": 1.9921922755622535e-06, "loss": 0.8504, "step": 1618 }, { "epoch": 0.23, "grad_norm": 6.641019821166992, "learning_rate": 1.992172522504483e-06, "loss": 0.7898, "step": 1619 }, { "epoch": 0.24, "grad_norm": 7.556062698364258, "learning_rate": 1.9921527445894416e-06, "loss": 0.7405, "step": 1620 }, { "epoch": 0.24, "grad_norm": 6.278707027435303, "learning_rate": 1.992132941817626e-06, "loss": 0.7164, "step": 1621 }, { "epoch": 0.24, "grad_norm": 5.470459938049316, "learning_rate": 1.9921131141895307e-06, "loss": 0.7476, "step": 1622 }, { "epoch": 0.24, "grad_norm": 7.784265041351318, "learning_rate": 1.992093261705654e-06, "loss": 0.7143, "step": 1623 }, { "epoch": 0.24, "grad_norm": 6.375666618347168, "learning_rate": 1.9920733843664925e-06, "loss": 0.695, "step": 1624 }, { "epoch": 0.24, "grad_norm": 6.612038612365723, "learning_rate": 1.992053482172544e-06, "loss": 0.7646, "step": 1625 }, { "epoch": 0.24, "grad_norm": 6.915318965911865, "learning_rate": 1.9920335551243076e-06, "loss": 0.711, "step": 1626 }, { "epoch": 0.24, "grad_norm": 6.029971122741699, "learning_rate": 1.9920136032222823e-06, "loss": 0.7859, "step": 1627 }, { "epoch": 0.24, "grad_norm": 6.168227672576904, "learning_rate": 1.9919936264669677e-06, "loss": 0.7294, "step": 1628 }, { "epoch": 0.24, "grad_norm": 5.776288032531738, "learning_rate": 1.9919736248588653e-06, "loss": 0.6868, "step": 1629 }, { "epoch": 0.24, "grad_norm": 5.962970733642578, "learning_rate": 1.9919535983984754e-06, "loss": 0.689, "step": 1630 }, { "epoch": 0.24, "grad_norm": 6.494171142578125, "learning_rate": 1.9919335470862993e-06, "loss": 0.6977, "step": 1631 }, { "epoch": 0.24, "grad_norm": 6.021823883056641, "learning_rate": 1.99191347092284e-06, "loss": 0.718, "step": 1632 }, { "epoch": 0.24, "grad_norm": 6.3875861167907715, "learning_rate": 1.9918933699086007e-06, "loss": 0.8374, "step": 1633 }, { "epoch": 0.24, "grad_norm": 6.63298225402832, "learning_rate": 1.991873244044085e-06, "loss": 0.7896, "step": 1634 }, { "epoch": 0.24, "grad_norm": 6.1836256980896, "learning_rate": 1.991853093329796e-06, "loss": 0.7661, "step": 1635 }, { "epoch": 0.24, "grad_norm": 6.28641939163208, "learning_rate": 1.99183291776624e-06, "loss": 0.7972, "step": 1636 }, { "epoch": 0.24, "grad_norm": 5.892693042755127, "learning_rate": 1.9918127173539213e-06, "loss": 0.7588, "step": 1637 }, { "epoch": 0.24, "grad_norm": 6.209299087524414, "learning_rate": 1.991792492093347e-06, "loss": 0.7856, "step": 1638 }, { "epoch": 0.24, "grad_norm": 6.3881754875183105, "learning_rate": 1.991772241985023e-06, "loss": 0.7603, "step": 1639 }, { "epoch": 0.24, "grad_norm": 6.600519180297852, "learning_rate": 1.9917519670294565e-06, "loss": 0.7808, "step": 1640 }, { "epoch": 0.24, "grad_norm": 6.527427673339844, "learning_rate": 1.9917316672271566e-06, "loss": 0.8026, "step": 1641 }, { "epoch": 0.24, "grad_norm": 6.96036434173584, "learning_rate": 1.9917113425786313e-06, "loss": 0.7921, "step": 1642 }, { "epoch": 0.24, "grad_norm": 6.2916741371154785, "learning_rate": 1.9916909930843892e-06, "loss": 0.7391, "step": 1643 }, { "epoch": 0.24, "grad_norm": 6.27748441696167, "learning_rate": 1.9916706187449408e-06, "loss": 0.787, "step": 1644 }, { "epoch": 0.24, "grad_norm": 6.013085842132568, "learning_rate": 1.9916502195607962e-06, "loss": 0.7281, "step": 1645 }, { "epoch": 0.24, "grad_norm": 7.040189743041992, "learning_rate": 1.9916297955324664e-06, "loss": 0.7927, "step": 1646 }, { "epoch": 0.24, "grad_norm": 6.3650736808776855, "learning_rate": 1.9916093466604637e-06, "loss": 0.7075, "step": 1647 }, { "epoch": 0.24, "grad_norm": 6.339793682098389, "learning_rate": 1.9915888729452998e-06, "loss": 0.7774, "step": 1648 }, { "epoch": 0.24, "grad_norm": 6.136661052703857, "learning_rate": 1.991568374387488e-06, "loss": 0.817, "step": 1649 }, { "epoch": 0.24, "grad_norm": 7.095183372497559, "learning_rate": 1.9915478509875414e-06, "loss": 0.7991, "step": 1650 }, { "epoch": 0.24, "grad_norm": 6.468672275543213, "learning_rate": 1.9915273027459748e-06, "loss": 0.7694, "step": 1651 }, { "epoch": 0.24, "grad_norm": 5.935378074645996, "learning_rate": 1.9915067296633027e-06, "loss": 0.7007, "step": 1652 }, { "epoch": 0.24, "grad_norm": 6.291462421417236, "learning_rate": 1.99148613174004e-06, "loss": 0.7009, "step": 1653 }, { "epoch": 0.24, "grad_norm": 5.762714385986328, "learning_rate": 1.991465508976704e-06, "loss": 0.7, "step": 1654 }, { "epoch": 0.24, "grad_norm": 6.758007049560547, "learning_rate": 1.9914448613738106e-06, "loss": 0.6949, "step": 1655 }, { "epoch": 0.24, "grad_norm": 6.528113842010498, "learning_rate": 1.9914241889318766e-06, "loss": 0.738, "step": 1656 }, { "epoch": 0.24, "grad_norm": 6.572569370269775, "learning_rate": 1.9914034916514206e-06, "loss": 0.7878, "step": 1657 }, { "epoch": 0.24, "grad_norm": 6.2466959953308105, "learning_rate": 1.991382769532961e-06, "loss": 0.7166, "step": 1658 }, { "epoch": 0.24, "grad_norm": 6.368842124938965, "learning_rate": 1.9913620225770173e-06, "loss": 0.7167, "step": 1659 }, { "epoch": 0.24, "grad_norm": 5.943832874298096, "learning_rate": 1.9913412507841084e-06, "loss": 0.7439, "step": 1660 }, { "epoch": 0.24, "grad_norm": 6.163771152496338, "learning_rate": 1.9913204541547556e-06, "loss": 0.6881, "step": 1661 }, { "epoch": 0.24, "grad_norm": 6.592225074768066, "learning_rate": 1.991299632689479e-06, "loss": 0.7798, "step": 1662 }, { "epoch": 0.24, "grad_norm": 6.188870906829834, "learning_rate": 1.9912787863888015e-06, "loss": 0.7609, "step": 1663 }, { "epoch": 0.24, "grad_norm": 6.2893476486206055, "learning_rate": 1.9912579152532444e-06, "loss": 0.7443, "step": 1664 }, { "epoch": 0.24, "grad_norm": 5.663012981414795, "learning_rate": 1.9912370192833306e-06, "loss": 0.7816, "step": 1665 }, { "epoch": 0.24, "grad_norm": 6.057720184326172, "learning_rate": 1.9912160984795844e-06, "loss": 0.7452, "step": 1666 }, { "epoch": 0.24, "grad_norm": 6.215766429901123, "learning_rate": 1.991195152842529e-06, "loss": 0.7847, "step": 1667 }, { "epoch": 0.24, "grad_norm": 6.609939098358154, "learning_rate": 1.9911741823726897e-06, "loss": 0.7814, "step": 1668 }, { "epoch": 0.24, "grad_norm": 6.532577991485596, "learning_rate": 1.9911531870705915e-06, "loss": 0.8648, "step": 1669 }, { "epoch": 0.24, "grad_norm": 6.5519890785217285, "learning_rate": 1.991132166936761e-06, "loss": 0.7967, "step": 1670 }, { "epoch": 0.24, "grad_norm": 7.124937057495117, "learning_rate": 1.991111121971724e-06, "loss": 0.8099, "step": 1671 }, { "epoch": 0.24, "grad_norm": 7.240155220031738, "learning_rate": 1.991090052176009e-06, "loss": 0.8155, "step": 1672 }, { "epoch": 0.24, "grad_norm": 6.264976501464844, "learning_rate": 1.9910689575501426e-06, "loss": 0.7406, "step": 1673 }, { "epoch": 0.24, "grad_norm": 5.994197368621826, "learning_rate": 1.991047838094654e-06, "loss": 0.7763, "step": 1674 }, { "epoch": 0.24, "grad_norm": 6.245902061462402, "learning_rate": 1.9910266938100716e-06, "loss": 0.717, "step": 1675 }, { "epoch": 0.24, "grad_norm": 6.696354389190674, "learning_rate": 1.9910055246969262e-06, "loss": 0.7255, "step": 1676 }, { "epoch": 0.24, "grad_norm": 6.034346580505371, "learning_rate": 1.990984330755747e-06, "loss": 0.7115, "step": 1677 }, { "epoch": 0.24, "grad_norm": 5.844946384429932, "learning_rate": 1.9909631119870663e-06, "loss": 0.7625, "step": 1678 }, { "epoch": 0.24, "grad_norm": 7.290412425994873, "learning_rate": 1.9909418683914145e-06, "loss": 0.7182, "step": 1679 }, { "epoch": 0.24, "grad_norm": 6.098972797393799, "learning_rate": 1.9909205999693248e-06, "loss": 0.7831, "step": 1680 }, { "epoch": 0.24, "grad_norm": 6.798252582550049, "learning_rate": 1.9908993067213287e-06, "loss": 0.7953, "step": 1681 }, { "epoch": 0.24, "grad_norm": 6.846597194671631, "learning_rate": 1.990877988647961e-06, "loss": 0.8711, "step": 1682 }, { "epoch": 0.24, "grad_norm": 6.84132719039917, "learning_rate": 1.9908566457497555e-06, "loss": 0.7649, "step": 1683 }, { "epoch": 0.24, "grad_norm": 6.127053260803223, "learning_rate": 1.9908352780272464e-06, "loss": 0.7782, "step": 1684 }, { "epoch": 0.24, "grad_norm": 6.910600185394287, "learning_rate": 1.9908138854809694e-06, "loss": 0.8955, "step": 1685 }, { "epoch": 0.24, "grad_norm": 6.421804904937744, "learning_rate": 1.9907924681114605e-06, "loss": 0.7416, "step": 1686 }, { "epoch": 0.24, "grad_norm": 6.232474327087402, "learning_rate": 1.9907710259192564e-06, "loss": 0.8606, "step": 1687 }, { "epoch": 0.24, "grad_norm": 7.292420387268066, "learning_rate": 1.9907495589048936e-06, "loss": 0.7783, "step": 1688 }, { "epoch": 0.25, "grad_norm": 7.265207290649414, "learning_rate": 1.9907280670689105e-06, "loss": 0.8073, "step": 1689 }, { "epoch": 0.25, "grad_norm": 5.956777572631836, "learning_rate": 1.9907065504118457e-06, "loss": 0.6754, "step": 1690 }, { "epoch": 0.25, "grad_norm": 7.273878574371338, "learning_rate": 1.9906850089342376e-06, "loss": 0.854, "step": 1691 }, { "epoch": 0.25, "grad_norm": 6.457524299621582, "learning_rate": 1.990663442636627e-06, "loss": 0.7227, "step": 1692 }, { "epoch": 0.25, "grad_norm": 6.899114608764648, "learning_rate": 1.990641851519553e-06, "loss": 0.798, "step": 1693 }, { "epoch": 0.25, "grad_norm": 7.35143518447876, "learning_rate": 1.990620235583557e-06, "loss": 0.6998, "step": 1694 }, { "epoch": 0.25, "grad_norm": 6.448607444763184, "learning_rate": 1.9905985948291807e-06, "loss": 0.755, "step": 1695 }, { "epoch": 0.25, "grad_norm": 6.532499313354492, "learning_rate": 1.990576929256966e-06, "loss": 0.7784, "step": 1696 }, { "epoch": 0.25, "grad_norm": 6.652240753173828, "learning_rate": 1.990555238867456e-06, "loss": 0.8063, "step": 1697 }, { "epoch": 0.25, "grad_norm": 6.9731125831604, "learning_rate": 1.990533523661194e-06, "loss": 0.764, "step": 1698 }, { "epoch": 0.25, "grad_norm": 6.494365692138672, "learning_rate": 1.990511783638724e-06, "loss": 0.8014, "step": 1699 }, { "epoch": 0.25, "grad_norm": 7.057163238525391, "learning_rate": 1.990490018800591e-06, "loss": 0.7102, "step": 1700 }, { "epoch": 0.25, "grad_norm": 6.609208106994629, "learning_rate": 1.9904682291473394e-06, "loss": 0.8376, "step": 1701 }, { "epoch": 0.25, "grad_norm": 6.182541370391846, "learning_rate": 1.9904464146795157e-06, "loss": 0.8194, "step": 1702 }, { "epoch": 0.25, "grad_norm": 6.149275779724121, "learning_rate": 1.9904245753976666e-06, "loss": 0.7964, "step": 1703 }, { "epoch": 0.25, "grad_norm": 6.311537742614746, "learning_rate": 1.9904027113023393e-06, "loss": 0.7659, "step": 1704 }, { "epoch": 0.25, "grad_norm": 6.824280738830566, "learning_rate": 1.990380822394081e-06, "loss": 0.7354, "step": 1705 }, { "epoch": 0.25, "grad_norm": 6.560774326324463, "learning_rate": 1.99035890867344e-06, "loss": 0.7481, "step": 1706 }, { "epoch": 0.25, "grad_norm": 6.393545150756836, "learning_rate": 1.990336970140966e-06, "loss": 0.7196, "step": 1707 }, { "epoch": 0.25, "grad_norm": 6.362223148345947, "learning_rate": 1.9903150067972086e-06, "loss": 0.7746, "step": 1708 }, { "epoch": 0.25, "grad_norm": 6.66926908493042, "learning_rate": 1.9902930186427177e-06, "loss": 0.7247, "step": 1709 }, { "epoch": 0.25, "grad_norm": 6.126853942871094, "learning_rate": 1.990271005678044e-06, "loss": 0.7492, "step": 1710 }, { "epoch": 0.25, "grad_norm": 7.033971309661865, "learning_rate": 1.9902489679037396e-06, "loss": 0.8627, "step": 1711 }, { "epoch": 0.25, "grad_norm": 6.681117057800293, "learning_rate": 1.990226905320356e-06, "loss": 0.7794, "step": 1712 }, { "epoch": 0.25, "grad_norm": 6.330780029296875, "learning_rate": 1.9902048179284464e-06, "loss": 0.8091, "step": 1713 }, { "epoch": 0.25, "grad_norm": 7.410407066345215, "learning_rate": 1.9901827057285642e-06, "loss": 0.6886, "step": 1714 }, { "epoch": 0.25, "grad_norm": 5.833026885986328, "learning_rate": 1.990160568721263e-06, "loss": 0.7309, "step": 1715 }, { "epoch": 0.25, "grad_norm": 6.192899703979492, "learning_rate": 1.9901384069070977e-06, "loss": 0.8044, "step": 1716 }, { "epoch": 0.25, "grad_norm": 6.429494857788086, "learning_rate": 1.9901162202866234e-06, "loss": 0.7494, "step": 1717 }, { "epoch": 0.25, "grad_norm": 5.714809894561768, "learning_rate": 1.990094008860396e-06, "loss": 0.7387, "step": 1718 }, { "epoch": 0.25, "grad_norm": 6.456145286560059, "learning_rate": 1.990071772628972e-06, "loss": 0.7546, "step": 1719 }, { "epoch": 0.25, "grad_norm": 7.038434982299805, "learning_rate": 1.990049511592908e-06, "loss": 0.9087, "step": 1720 }, { "epoch": 0.25, "grad_norm": 6.375280857086182, "learning_rate": 1.990027225752762e-06, "loss": 0.7513, "step": 1721 }, { "epoch": 0.25, "grad_norm": 6.01577615737915, "learning_rate": 1.9900049151090933e-06, "loss": 0.7217, "step": 1722 }, { "epoch": 0.25, "grad_norm": 7.126707553863525, "learning_rate": 1.98998257966246e-06, "loss": 0.7604, "step": 1723 }, { "epoch": 0.25, "grad_norm": 7.555356979370117, "learning_rate": 1.989960219413421e-06, "loss": 0.8052, "step": 1724 }, { "epoch": 0.25, "grad_norm": 6.36943244934082, "learning_rate": 1.9899378343625373e-06, "loss": 0.7576, "step": 1725 }, { "epoch": 0.25, "grad_norm": 6.082250595092773, "learning_rate": 1.9899154245103702e-06, "loss": 0.6749, "step": 1726 }, { "epoch": 0.25, "grad_norm": 6.39382791519165, "learning_rate": 1.98989298985748e-06, "loss": 0.7238, "step": 1727 }, { "epoch": 0.25, "grad_norm": 6.635709762573242, "learning_rate": 1.9898705304044297e-06, "loss": 0.7555, "step": 1728 }, { "epoch": 0.25, "grad_norm": 6.834378242492676, "learning_rate": 1.9898480461517815e-06, "loss": 0.7274, "step": 1729 }, { "epoch": 0.25, "grad_norm": 5.910187721252441, "learning_rate": 1.989825537100099e-06, "loss": 0.8057, "step": 1730 }, { "epoch": 0.25, "grad_norm": 6.026977062225342, "learning_rate": 1.989803003249946e-06, "loss": 0.7625, "step": 1731 }, { "epoch": 0.25, "grad_norm": 6.666296482086182, "learning_rate": 1.989780444601887e-06, "loss": 0.7886, "step": 1732 }, { "epoch": 0.25, "grad_norm": 5.660505294799805, "learning_rate": 1.9897578611564867e-06, "loss": 0.7191, "step": 1733 }, { "epoch": 0.25, "grad_norm": 6.774153709411621, "learning_rate": 1.9897352529143117e-06, "loss": 0.7808, "step": 1734 }, { "epoch": 0.25, "grad_norm": 6.938343524932861, "learning_rate": 1.989712619875928e-06, "loss": 0.9879, "step": 1735 }, { "epoch": 0.25, "grad_norm": 7.666864395141602, "learning_rate": 1.989689962041903e-06, "loss": 0.8255, "step": 1736 }, { "epoch": 0.25, "grad_norm": 6.459397315979004, "learning_rate": 1.9896672794128042e-06, "loss": 0.726, "step": 1737 }, { "epoch": 0.25, "grad_norm": 6.270968914031982, "learning_rate": 1.9896445719891994e-06, "loss": 0.718, "step": 1738 }, { "epoch": 0.25, "grad_norm": 6.670030117034912, "learning_rate": 1.989621839771658e-06, "loss": 0.7062, "step": 1739 }, { "epoch": 0.25, "grad_norm": 6.275521755218506, "learning_rate": 1.9895990827607493e-06, "loss": 0.7158, "step": 1740 }, { "epoch": 0.25, "grad_norm": 6.776490688323975, "learning_rate": 1.9895763009570434e-06, "loss": 0.7092, "step": 1741 }, { "epoch": 0.25, "grad_norm": 6.203810214996338, "learning_rate": 1.9895534943611114e-06, "loss": 0.7512, "step": 1742 }, { "epoch": 0.25, "grad_norm": 6.089656352996826, "learning_rate": 1.9895306629735244e-06, "loss": 0.7218, "step": 1743 }, { "epoch": 0.25, "grad_norm": 6.713515758514404, "learning_rate": 1.9895078067948544e-06, "loss": 0.706, "step": 1744 }, { "epoch": 0.25, "grad_norm": 6.500524520874023, "learning_rate": 1.989484925825674e-06, "loss": 0.7487, "step": 1745 }, { "epoch": 0.25, "grad_norm": 6.310241222381592, "learning_rate": 1.989462020066557e-06, "loss": 0.7948, "step": 1746 }, { "epoch": 0.25, "grad_norm": 6.392171859741211, "learning_rate": 1.989439089518076e-06, "loss": 0.8263, "step": 1747 }, { "epoch": 0.25, "grad_norm": 6.300758361816406, "learning_rate": 1.9894161341808066e-06, "loss": 0.7801, "step": 1748 }, { "epoch": 0.25, "grad_norm": 6.1517415046691895, "learning_rate": 1.989393154055324e-06, "loss": 0.8669, "step": 1749 }, { "epoch": 0.25, "grad_norm": 6.012267112731934, "learning_rate": 1.989370149142203e-06, "loss": 0.7901, "step": 1750 }, { "epoch": 0.25, "grad_norm": 5.351133346557617, "learning_rate": 1.989347119442021e-06, "loss": 0.7626, "step": 1751 }, { "epoch": 0.25, "grad_norm": 6.439187526702881, "learning_rate": 1.989324064955354e-06, "loss": 0.7635, "step": 1752 }, { "epoch": 0.25, "grad_norm": 5.838225364685059, "learning_rate": 1.98930098568278e-06, "loss": 0.7792, "step": 1753 }, { "epoch": 0.25, "grad_norm": 6.4283599853515625, "learning_rate": 1.9892778816248772e-06, "loss": 0.7605, "step": 1754 }, { "epoch": 0.25, "grad_norm": 6.348487377166748, "learning_rate": 1.989254752782225e-06, "loss": 0.7412, "step": 1755 }, { "epoch": 0.25, "grad_norm": 6.715217590332031, "learning_rate": 1.9892315991554016e-06, "loss": 0.7272, "step": 1756 }, { "epoch": 0.25, "grad_norm": 5.974793910980225, "learning_rate": 1.9892084207449888e-06, "loss": 0.8191, "step": 1757 }, { "epoch": 0.26, "grad_norm": 5.666583061218262, "learning_rate": 1.9891852175515654e-06, "loss": 0.7679, "step": 1758 }, { "epoch": 0.26, "grad_norm": 5.878620147705078, "learning_rate": 1.9891619895757143e-06, "loss": 0.7444, "step": 1759 }, { "epoch": 0.26, "grad_norm": 6.229365348815918, "learning_rate": 1.989138736818016e-06, "loss": 0.8939, "step": 1760 }, { "epoch": 0.26, "grad_norm": 5.950687408447266, "learning_rate": 1.9891154592790546e-06, "loss": 0.7498, "step": 1761 }, { "epoch": 0.26, "grad_norm": 6.163280963897705, "learning_rate": 1.9890921569594123e-06, "loss": 0.755, "step": 1762 }, { "epoch": 0.26, "grad_norm": 6.9115824699401855, "learning_rate": 1.989068829859673e-06, "loss": 0.8565, "step": 1763 }, { "epoch": 0.26, "grad_norm": 5.832455158233643, "learning_rate": 1.989045477980421e-06, "loss": 0.7229, "step": 1764 }, { "epoch": 0.26, "grad_norm": 5.402175426483154, "learning_rate": 1.9890221013222418e-06, "loss": 0.6923, "step": 1765 }, { "epoch": 0.26, "grad_norm": 6.291062831878662, "learning_rate": 1.988998699885721e-06, "loss": 0.8095, "step": 1766 }, { "epoch": 0.26, "grad_norm": 6.973996162414551, "learning_rate": 1.9889752736714446e-06, "loss": 0.755, "step": 1767 }, { "epoch": 0.26, "grad_norm": 6.480388164520264, "learning_rate": 1.9889518226799996e-06, "loss": 0.7341, "step": 1768 }, { "epoch": 0.26, "grad_norm": 6.211118221282959, "learning_rate": 1.988928346911974e-06, "loss": 0.7702, "step": 1769 }, { "epoch": 0.26, "grad_norm": 6.492496490478516, "learning_rate": 1.9889048463679546e-06, "loss": 0.779, "step": 1770 }, { "epoch": 0.26, "grad_norm": 6.409772872924805, "learning_rate": 1.9888813210485316e-06, "loss": 0.7395, "step": 1771 }, { "epoch": 0.26, "grad_norm": 6.420814514160156, "learning_rate": 1.988857770954294e-06, "loss": 0.7672, "step": 1772 }, { "epoch": 0.26, "grad_norm": 6.417016983032227, "learning_rate": 1.9888341960858314e-06, "loss": 0.7849, "step": 1773 }, { "epoch": 0.26, "grad_norm": 5.895301342010498, "learning_rate": 1.988810596443735e-06, "loss": 0.7677, "step": 1774 }, { "epoch": 0.26, "grad_norm": 7.152631759643555, "learning_rate": 1.9887869720285955e-06, "loss": 0.7907, "step": 1775 }, { "epoch": 0.26, "grad_norm": 5.8392252922058105, "learning_rate": 1.988763322841005e-06, "loss": 0.7014, "step": 1776 }, { "epoch": 0.26, "grad_norm": 6.760347366333008, "learning_rate": 1.9887396488815563e-06, "loss": 0.7504, "step": 1777 }, { "epoch": 0.26, "grad_norm": 5.895553112030029, "learning_rate": 1.988715950150842e-06, "loss": 0.6964, "step": 1778 }, { "epoch": 0.26, "grad_norm": 6.640448570251465, "learning_rate": 1.988692226649456e-06, "loss": 0.6915, "step": 1779 }, { "epoch": 0.26, "grad_norm": 6.22177791595459, "learning_rate": 1.9886684783779926e-06, "loss": 0.6692, "step": 1780 }, { "epoch": 0.26, "grad_norm": 6.219435214996338, "learning_rate": 1.9886447053370473e-06, "loss": 0.7874, "step": 1781 }, { "epoch": 0.26, "grad_norm": 6.217536926269531, "learning_rate": 1.988620907527215e-06, "loss": 0.7023, "step": 1782 }, { "epoch": 0.26, "grad_norm": 7.265847206115723, "learning_rate": 1.9885970849490927e-06, "loss": 0.6728, "step": 1783 }, { "epoch": 0.26, "grad_norm": 5.90098237991333, "learning_rate": 1.988573237603276e-06, "loss": 0.7765, "step": 1784 }, { "epoch": 0.26, "grad_norm": 6.780352592468262, "learning_rate": 1.9885493654903634e-06, "loss": 0.7155, "step": 1785 }, { "epoch": 0.26, "grad_norm": 6.356246471405029, "learning_rate": 1.9885254686109527e-06, "loss": 0.6704, "step": 1786 }, { "epoch": 0.26, "grad_norm": 6.269070148468018, "learning_rate": 1.988501546965643e-06, "loss": 0.7272, "step": 1787 }, { "epoch": 0.26, "grad_norm": 6.744259357452393, "learning_rate": 1.9884776005550325e-06, "loss": 0.7856, "step": 1788 }, { "epoch": 0.26, "grad_norm": 6.002190589904785, "learning_rate": 1.9884536293797223e-06, "loss": 0.7372, "step": 1789 }, { "epoch": 0.26, "grad_norm": 6.907941818237305, "learning_rate": 1.988429633440312e-06, "loss": 0.8892, "step": 1790 }, { "epoch": 0.26, "grad_norm": 6.187205791473389, "learning_rate": 1.9884056127374035e-06, "loss": 0.7396, "step": 1791 }, { "epoch": 0.26, "grad_norm": 6.443041801452637, "learning_rate": 1.9883815672715986e-06, "loss": 0.7303, "step": 1792 }, { "epoch": 0.26, "grad_norm": 7.069962024688721, "learning_rate": 1.988357497043499e-06, "loss": 0.7299, "step": 1793 }, { "epoch": 0.26, "grad_norm": 6.7532958984375, "learning_rate": 1.988333402053709e-06, "loss": 0.7779, "step": 1794 }, { "epoch": 0.26, "grad_norm": 7.479704856872559, "learning_rate": 1.988309282302831e-06, "loss": 0.7675, "step": 1795 }, { "epoch": 0.26, "grad_norm": 5.684778690338135, "learning_rate": 1.9882851377914696e-06, "loss": 0.7298, "step": 1796 }, { "epoch": 0.26, "grad_norm": 5.881229400634766, "learning_rate": 1.9882609685202302e-06, "loss": 0.7509, "step": 1797 }, { "epoch": 0.26, "grad_norm": 5.737620830535889, "learning_rate": 1.9882367744897177e-06, "loss": 0.7277, "step": 1798 }, { "epoch": 0.26, "grad_norm": 6.279627799987793, "learning_rate": 1.9882125557005384e-06, "loss": 0.685, "step": 1799 }, { "epoch": 0.26, "grad_norm": 6.105114459991455, "learning_rate": 1.9881883121532997e-06, "loss": 0.7223, "step": 1800 }, { "epoch": 0.26, "grad_norm": 6.396207809448242, "learning_rate": 1.988164043848608e-06, "loss": 0.6846, "step": 1801 }, { "epoch": 0.26, "grad_norm": 5.509081840515137, "learning_rate": 1.988139750787072e-06, "loss": 0.7401, "step": 1802 }, { "epoch": 0.26, "grad_norm": 5.948086738586426, "learning_rate": 1.9881154329692998e-06, "loss": 0.8092, "step": 1803 }, { "epoch": 0.26, "grad_norm": 5.561705589294434, "learning_rate": 1.9880910903959017e-06, "loss": 0.733, "step": 1804 }, { "epoch": 0.26, "grad_norm": 6.0244832038879395, "learning_rate": 1.9880667230674862e-06, "loss": 0.7659, "step": 1805 }, { "epoch": 0.26, "grad_norm": 6.597229480743408, "learning_rate": 1.9880423309846647e-06, "loss": 0.7277, "step": 1806 }, { "epoch": 0.26, "grad_norm": 6.025086402893066, "learning_rate": 1.9880179141480476e-06, "loss": 0.6877, "step": 1807 }, { "epoch": 0.26, "grad_norm": 6.581949234008789, "learning_rate": 1.9879934725582475e-06, "loss": 0.7173, "step": 1808 }, { "epoch": 0.26, "grad_norm": 6.086109638214111, "learning_rate": 1.987969006215876e-06, "loss": 0.7406, "step": 1809 }, { "epoch": 0.26, "grad_norm": 5.925724029541016, "learning_rate": 1.9879445151215465e-06, "loss": 0.7461, "step": 1810 }, { "epoch": 0.26, "grad_norm": 7.406732082366943, "learning_rate": 1.987919999275872e-06, "loss": 0.8999, "step": 1811 }, { "epoch": 0.26, "grad_norm": 6.560225486755371, "learning_rate": 1.9878954586794673e-06, "loss": 0.7133, "step": 1812 }, { "epoch": 0.26, "grad_norm": 6.147462844848633, "learning_rate": 1.9878708933329473e-06, "loss": 0.7806, "step": 1813 }, { "epoch": 0.26, "grad_norm": 6.3713154792785645, "learning_rate": 1.9878463032369267e-06, "loss": 0.8422, "step": 1814 }, { "epoch": 0.26, "grad_norm": 6.240959167480469, "learning_rate": 1.9878216883920227e-06, "loss": 0.7286, "step": 1815 }, { "epoch": 0.26, "grad_norm": 6.675124645233154, "learning_rate": 1.987797048798851e-06, "loss": 0.7732, "step": 1816 }, { "epoch": 0.26, "grad_norm": 6.012148857116699, "learning_rate": 1.9877723844580293e-06, "loss": 0.736, "step": 1817 }, { "epoch": 0.26, "grad_norm": 5.893982887268066, "learning_rate": 1.9877476953701754e-06, "loss": 0.7973, "step": 1818 }, { "epoch": 0.26, "grad_norm": 6.487126350402832, "learning_rate": 1.987722981535908e-06, "loss": 0.8072, "step": 1819 }, { "epoch": 0.26, "grad_norm": 7.2993574142456055, "learning_rate": 1.987698242955846e-06, "loss": 0.7246, "step": 1820 }, { "epoch": 0.26, "grad_norm": 6.674409866333008, "learning_rate": 1.9876734796306093e-06, "loss": 0.7679, "step": 1821 }, { "epoch": 0.26, "grad_norm": 6.801671981811523, "learning_rate": 1.9876486915608188e-06, "loss": 0.7686, "step": 1822 }, { "epoch": 0.26, "grad_norm": 6.774173736572266, "learning_rate": 1.9876238787470948e-06, "loss": 0.8299, "step": 1823 }, { "epoch": 0.26, "grad_norm": 6.389785289764404, "learning_rate": 1.987599041190059e-06, "loss": 0.6865, "step": 1824 }, { "epoch": 0.26, "grad_norm": 6.233993053436279, "learning_rate": 1.9875741788903345e-06, "loss": 0.768, "step": 1825 }, { "epoch": 0.26, "grad_norm": 6.290210247039795, "learning_rate": 1.9875492918485434e-06, "loss": 0.7581, "step": 1826 }, { "epoch": 0.27, "grad_norm": 5.7569074630737305, "learning_rate": 1.987524380065309e-06, "loss": 0.7373, "step": 1827 }, { "epoch": 0.27, "grad_norm": 7.2523274421691895, "learning_rate": 1.9874994435412564e-06, "loss": 0.8224, "step": 1828 }, { "epoch": 0.27, "grad_norm": 7.33977746963501, "learning_rate": 1.987474482277009e-06, "loss": 0.7704, "step": 1829 }, { "epoch": 0.27, "grad_norm": 5.6935648918151855, "learning_rate": 1.9874494962731937e-06, "loss": 0.7293, "step": 1830 }, { "epoch": 0.27, "grad_norm": 6.4020771980285645, "learning_rate": 1.9874244855304353e-06, "loss": 0.7684, "step": 1831 }, { "epoch": 0.27, "grad_norm": 6.025520324707031, "learning_rate": 1.987399450049361e-06, "loss": 0.7345, "step": 1832 }, { "epoch": 0.27, "grad_norm": 6.577572822570801, "learning_rate": 1.9873743898305975e-06, "loss": 0.7639, "step": 1833 }, { "epoch": 0.27, "grad_norm": 5.863094329833984, "learning_rate": 1.9873493048747734e-06, "loss": 0.7191, "step": 1834 }, { "epoch": 0.27, "grad_norm": 6.19534158706665, "learning_rate": 1.9873241951825163e-06, "loss": 0.7394, "step": 1835 }, { "epoch": 0.27, "grad_norm": 5.872631072998047, "learning_rate": 1.987299060754456e-06, "loss": 0.6317, "step": 1836 }, { "epoch": 0.27, "grad_norm": 5.937705039978027, "learning_rate": 1.987273901591222e-06, "loss": 0.7436, "step": 1837 }, { "epoch": 0.27, "grad_norm": 6.889361381530762, "learning_rate": 1.987248717693444e-06, "loss": 0.7804, "step": 1838 }, { "epoch": 0.27, "grad_norm": 6.687798976898193, "learning_rate": 1.9872235090617545e-06, "loss": 0.7843, "step": 1839 }, { "epoch": 0.27, "grad_norm": 6.467319011688232, "learning_rate": 1.9871982756967833e-06, "loss": 0.687, "step": 1840 }, { "epoch": 0.27, "grad_norm": 6.1728386878967285, "learning_rate": 1.9871730175991636e-06, "loss": 0.7429, "step": 1841 }, { "epoch": 0.27, "grad_norm": 6.431681156158447, "learning_rate": 1.987147734769528e-06, "loss": 0.7832, "step": 1842 }, { "epoch": 0.27, "grad_norm": 6.188238620758057, "learning_rate": 1.9871224272085096e-06, "loss": 0.6942, "step": 1843 }, { "epoch": 0.27, "grad_norm": 6.634006500244141, "learning_rate": 1.9870970949167427e-06, "loss": 0.6896, "step": 1844 }, { "epoch": 0.27, "grad_norm": 7.056770324707031, "learning_rate": 1.987071737894862e-06, "loss": 0.8776, "step": 1845 }, { "epoch": 0.27, "grad_norm": 6.43453311920166, "learning_rate": 1.987046356143503e-06, "loss": 0.7114, "step": 1846 }, { "epoch": 0.27, "grad_norm": 6.69672155380249, "learning_rate": 1.9870209496633008e-06, "loss": 0.8611, "step": 1847 }, { "epoch": 0.27, "grad_norm": 6.406817436218262, "learning_rate": 1.986995518454893e-06, "loss": 0.7879, "step": 1848 }, { "epoch": 0.27, "grad_norm": 5.859289646148682, "learning_rate": 1.9869700625189155e-06, "loss": 0.7829, "step": 1849 }, { "epoch": 0.27, "grad_norm": 6.300845623016357, "learning_rate": 1.9869445818560075e-06, "loss": 0.8197, "step": 1850 }, { "epoch": 0.27, "grad_norm": 6.782286643981934, "learning_rate": 1.986919076466806e-06, "loss": 0.7836, "step": 1851 }, { "epoch": 0.27, "grad_norm": 6.473018646240234, "learning_rate": 1.9868935463519507e-06, "loss": 0.7956, "step": 1852 }, { "epoch": 0.27, "grad_norm": 7.576248645782471, "learning_rate": 1.9868679915120814e-06, "loss": 0.85, "step": 1853 }, { "epoch": 0.27, "grad_norm": 5.742010593414307, "learning_rate": 1.9868424119478383e-06, "loss": 0.7614, "step": 1854 }, { "epoch": 0.27, "grad_norm": 6.268867492675781, "learning_rate": 1.986816807659862e-06, "loss": 0.7875, "step": 1855 }, { "epoch": 0.27, "grad_norm": 6.426595211029053, "learning_rate": 1.986791178648793e-06, "loss": 0.7182, "step": 1856 }, { "epoch": 0.27, "grad_norm": 6.611080646514893, "learning_rate": 1.986765524915275e-06, "loss": 0.7594, "step": 1857 }, { "epoch": 0.27, "grad_norm": 7.0249924659729, "learning_rate": 1.9867398464599506e-06, "loss": 0.8045, "step": 1858 }, { "epoch": 0.27, "grad_norm": 6.926028728485107, "learning_rate": 1.9867141432834622e-06, "loss": 0.7929, "step": 1859 }, { "epoch": 0.27, "grad_norm": 7.590976715087891, "learning_rate": 1.986688415386454e-06, "loss": 0.8587, "step": 1860 }, { "epoch": 0.27, "grad_norm": 5.903036117553711, "learning_rate": 1.986662662769571e-06, "loss": 0.7229, "step": 1861 }, { "epoch": 0.27, "grad_norm": 6.129387378692627, "learning_rate": 1.986636885433458e-06, "loss": 0.8113, "step": 1862 }, { "epoch": 0.27, "grad_norm": 6.627843856811523, "learning_rate": 1.986611083378761e-06, "loss": 0.743, "step": 1863 }, { "epoch": 0.27, "grad_norm": 6.401311874389648, "learning_rate": 1.9865852566061267e-06, "loss": 0.6889, "step": 1864 }, { "epoch": 0.27, "grad_norm": 5.79487943649292, "learning_rate": 1.9865594051162017e-06, "loss": 0.6908, "step": 1865 }, { "epoch": 0.27, "grad_norm": 6.186149597167969, "learning_rate": 1.9865335289096334e-06, "loss": 0.7746, "step": 1866 }, { "epoch": 0.27, "grad_norm": 6.284590244293213, "learning_rate": 1.9865076279870706e-06, "loss": 0.7286, "step": 1867 }, { "epoch": 0.27, "grad_norm": 6.11834716796875, "learning_rate": 1.986481702349162e-06, "loss": 0.7414, "step": 1868 }, { "epoch": 0.27, "grad_norm": 6.2470526695251465, "learning_rate": 1.9864557519965576e-06, "loss": 0.7446, "step": 1869 }, { "epoch": 0.27, "grad_norm": 5.946624279022217, "learning_rate": 1.9864297769299072e-06, "loss": 0.7739, "step": 1870 }, { "epoch": 0.27, "grad_norm": 6.209799289703369, "learning_rate": 1.9864037771498615e-06, "loss": 0.8209, "step": 1871 }, { "epoch": 0.27, "grad_norm": 6.821459770202637, "learning_rate": 1.9863777526570713e-06, "loss": 0.7485, "step": 1872 }, { "epoch": 0.27, "grad_norm": 6.5118794441223145, "learning_rate": 1.98635170345219e-06, "loss": 0.7433, "step": 1873 }, { "epoch": 0.27, "grad_norm": 7.131951808929443, "learning_rate": 1.9863256295358687e-06, "loss": 0.829, "step": 1874 }, { "epoch": 0.27, "grad_norm": 6.112889289855957, "learning_rate": 1.986299530908762e-06, "loss": 0.7206, "step": 1875 }, { "epoch": 0.27, "grad_norm": 5.666181564331055, "learning_rate": 1.9862734075715225e-06, "loss": 0.6893, "step": 1876 }, { "epoch": 0.27, "grad_norm": 6.561570644378662, "learning_rate": 1.9862472595248057e-06, "loss": 0.8197, "step": 1877 }, { "epoch": 0.27, "grad_norm": 5.56663179397583, "learning_rate": 1.986221086769266e-06, "loss": 0.7369, "step": 1878 }, { "epoch": 0.27, "grad_norm": 6.395836353302002, "learning_rate": 1.98619488930556e-06, "loss": 0.722, "step": 1879 }, { "epoch": 0.27, "grad_norm": 6.907220840454102, "learning_rate": 1.986168667134343e-06, "loss": 0.8161, "step": 1880 }, { "epoch": 0.27, "grad_norm": 6.806344985961914, "learning_rate": 1.9861424202562724e-06, "loss": 0.7057, "step": 1881 }, { "epoch": 0.27, "grad_norm": 6.614920616149902, "learning_rate": 1.986116148672006e-06, "loss": 0.8636, "step": 1882 }, { "epoch": 0.27, "grad_norm": 5.971229553222656, "learning_rate": 1.9860898523822016e-06, "loss": 0.8153, "step": 1883 }, { "epoch": 0.27, "grad_norm": 6.151391506195068, "learning_rate": 1.986063531387518e-06, "loss": 0.7592, "step": 1884 }, { "epoch": 0.27, "grad_norm": 6.967766761779785, "learning_rate": 1.986037185688615e-06, "loss": 0.7315, "step": 1885 }, { "epoch": 0.27, "grad_norm": 6.186967372894287, "learning_rate": 1.9860108152861524e-06, "loss": 0.7095, "step": 1886 }, { "epoch": 0.27, "grad_norm": 5.871858596801758, "learning_rate": 1.985984420180791e-06, "loss": 0.7253, "step": 1887 }, { "epoch": 0.27, "grad_norm": 5.911019325256348, "learning_rate": 1.985958000373192e-06, "loss": 0.7127, "step": 1888 }, { "epoch": 0.27, "grad_norm": 5.431756019592285, "learning_rate": 1.9859315558640174e-06, "loss": 0.7076, "step": 1889 }, { "epoch": 0.27, "grad_norm": 5.858226299285889, "learning_rate": 1.9859050866539297e-06, "loss": 0.6613, "step": 1890 }, { "epoch": 0.27, "grad_norm": 6.8844499588012695, "learning_rate": 1.9858785927435913e-06, "loss": 0.7223, "step": 1891 }, { "epoch": 0.27, "grad_norm": 6.8946332931518555, "learning_rate": 1.9858520741336676e-06, "loss": 0.7777, "step": 1892 }, { "epoch": 0.27, "grad_norm": 7.8543925285339355, "learning_rate": 1.9858255308248214e-06, "loss": 0.7354, "step": 1893 }, { "epoch": 0.27, "grad_norm": 6.774752140045166, "learning_rate": 1.9857989628177184e-06, "loss": 0.8412, "step": 1894 }, { "epoch": 0.27, "grad_norm": 6.476202487945557, "learning_rate": 1.9857723701130243e-06, "loss": 0.7892, "step": 1895 }, { "epoch": 0.28, "grad_norm": 5.82212495803833, "learning_rate": 1.985745752711405e-06, "loss": 0.8099, "step": 1896 }, { "epoch": 0.28, "grad_norm": 6.236591339111328, "learning_rate": 1.9857191106135275e-06, "loss": 0.7577, "step": 1897 }, { "epoch": 0.28, "grad_norm": 6.519994258880615, "learning_rate": 1.9856924438200597e-06, "loss": 0.719, "step": 1898 }, { "epoch": 0.28, "grad_norm": 6.239960193634033, "learning_rate": 1.985665752331669e-06, "loss": 0.8214, "step": 1899 }, { "epoch": 0.28, "grad_norm": 6.3124566078186035, "learning_rate": 1.9856390361490247e-06, "loss": 0.7239, "step": 1900 }, { "epoch": 0.28, "grad_norm": 6.647130966186523, "learning_rate": 1.9856122952727953e-06, "loss": 0.768, "step": 1901 }, { "epoch": 0.28, "grad_norm": 6.364936828613281, "learning_rate": 1.985585529703652e-06, "loss": 0.7124, "step": 1902 }, { "epoch": 0.28, "grad_norm": 6.719803810119629, "learning_rate": 1.985558739442264e-06, "loss": 0.7813, "step": 1903 }, { "epoch": 0.28, "grad_norm": 6.801145076751709, "learning_rate": 1.9855319244893035e-06, "loss": 0.681, "step": 1904 }, { "epoch": 0.28, "grad_norm": 6.766202926635742, "learning_rate": 1.985505084845442e-06, "loss": 0.7802, "step": 1905 }, { "epoch": 0.28, "grad_norm": 6.689481735229492, "learning_rate": 1.9854782205113513e-06, "loss": 0.8087, "step": 1906 }, { "epoch": 0.28, "grad_norm": 8.05010986328125, "learning_rate": 1.9854513314877055e-06, "loss": 0.7375, "step": 1907 }, { "epoch": 0.28, "grad_norm": 6.733436107635498, "learning_rate": 1.985424417775178e-06, "loss": 0.7742, "step": 1908 }, { "epoch": 0.28, "grad_norm": 6.18013334274292, "learning_rate": 1.9853974793744427e-06, "loss": 0.7657, "step": 1909 }, { "epoch": 0.28, "grad_norm": 6.694931983947754, "learning_rate": 1.9853705162861746e-06, "loss": 0.7125, "step": 1910 }, { "epoch": 0.28, "grad_norm": 6.349062442779541, "learning_rate": 1.985343528511049e-06, "loss": 0.8105, "step": 1911 }, { "epoch": 0.28, "grad_norm": 6.115397930145264, "learning_rate": 1.9853165160497425e-06, "loss": 0.732, "step": 1912 }, { "epoch": 0.28, "grad_norm": 6.991704940795898, "learning_rate": 1.9852894789029316e-06, "loss": 0.8652, "step": 1913 }, { "epoch": 0.28, "grad_norm": 6.127795696258545, "learning_rate": 1.985262417071294e-06, "loss": 0.7121, "step": 1914 }, { "epoch": 0.28, "grad_norm": 6.738133907318115, "learning_rate": 1.985235330555507e-06, "loss": 0.8193, "step": 1915 }, { "epoch": 0.28, "grad_norm": 6.941371917724609, "learning_rate": 1.98520821935625e-06, "loss": 0.7308, "step": 1916 }, { "epoch": 0.28, "grad_norm": 5.696821212768555, "learning_rate": 1.985181083474202e-06, "loss": 0.7197, "step": 1917 }, { "epoch": 0.28, "grad_norm": 5.760307788848877, "learning_rate": 1.9851539229100425e-06, "loss": 0.6952, "step": 1918 }, { "epoch": 0.28, "grad_norm": 6.083909034729004, "learning_rate": 1.985126737664452e-06, "loss": 0.7505, "step": 1919 }, { "epoch": 0.28, "grad_norm": 5.803302764892578, "learning_rate": 1.985099527738112e-06, "loss": 0.7498, "step": 1920 }, { "epoch": 0.28, "grad_norm": 6.653357982635498, "learning_rate": 1.985072293131704e-06, "loss": 0.7796, "step": 1921 }, { "epoch": 0.28, "grad_norm": 6.382076263427734, "learning_rate": 1.98504503384591e-06, "loss": 0.7378, "step": 1922 }, { "epoch": 0.28, "grad_norm": 7.573156833648682, "learning_rate": 1.985017749881413e-06, "loss": 0.7764, "step": 1923 }, { "epoch": 0.28, "grad_norm": 6.425681114196777, "learning_rate": 1.984990441238897e-06, "loss": 0.6803, "step": 1924 }, { "epoch": 0.28, "grad_norm": 5.952460289001465, "learning_rate": 1.984963107919046e-06, "loss": 0.7558, "step": 1925 }, { "epoch": 0.28, "grad_norm": 6.515596866607666, "learning_rate": 1.984935749922545e-06, "loss": 0.7023, "step": 1926 }, { "epoch": 0.28, "grad_norm": 6.037102222442627, "learning_rate": 1.9849083672500787e-06, "loss": 0.7126, "step": 1927 }, { "epoch": 0.28, "grad_norm": 5.796474456787109, "learning_rate": 1.984880959902334e-06, "loss": 0.651, "step": 1928 }, { "epoch": 0.28, "grad_norm": 5.81840705871582, "learning_rate": 1.984853527879997e-06, "loss": 0.7289, "step": 1929 }, { "epoch": 0.28, "grad_norm": 6.165190696716309, "learning_rate": 1.9848260711837548e-06, "loss": 0.7868, "step": 1930 }, { "epoch": 0.28, "grad_norm": 6.6512041091918945, "learning_rate": 1.9847985898142954e-06, "loss": 0.6756, "step": 1931 }, { "epoch": 0.28, "grad_norm": 6.543101787567139, "learning_rate": 1.984771083772308e-06, "loss": 0.7901, "step": 1932 }, { "epoch": 0.28, "grad_norm": 5.666467666625977, "learning_rate": 1.984743553058481e-06, "loss": 0.7311, "step": 1933 }, { "epoch": 0.28, "grad_norm": 6.436503887176514, "learning_rate": 1.984715997673504e-06, "loss": 0.7715, "step": 1934 }, { "epoch": 0.28, "grad_norm": 5.769561290740967, "learning_rate": 1.9846884176180677e-06, "loss": 0.7285, "step": 1935 }, { "epoch": 0.28, "grad_norm": 6.995087146759033, "learning_rate": 1.9846608128928633e-06, "loss": 0.7664, "step": 1936 }, { "epoch": 0.28, "grad_norm": 6.51812219619751, "learning_rate": 1.9846331834985824e-06, "loss": 0.9072, "step": 1937 }, { "epoch": 0.28, "grad_norm": 6.173680782318115, "learning_rate": 1.9846055294359167e-06, "loss": 0.7008, "step": 1938 }, { "epoch": 0.28, "grad_norm": 5.6692280769348145, "learning_rate": 1.984577850705559e-06, "loss": 0.7436, "step": 1939 }, { "epoch": 0.28, "grad_norm": 6.047989845275879, "learning_rate": 1.9845501473082032e-06, "loss": 0.7935, "step": 1940 }, { "epoch": 0.28, "grad_norm": 6.576366901397705, "learning_rate": 1.9845224192445434e-06, "loss": 0.8469, "step": 1941 }, { "epoch": 0.28, "grad_norm": 6.012335300445557, "learning_rate": 1.984494666515274e-06, "loss": 0.7369, "step": 1942 }, { "epoch": 0.28, "grad_norm": 5.958917140960693, "learning_rate": 1.9844668891210904e-06, "loss": 0.7175, "step": 1943 }, { "epoch": 0.28, "grad_norm": 6.924589157104492, "learning_rate": 1.984439087062688e-06, "loss": 0.7711, "step": 1944 }, { "epoch": 0.28, "grad_norm": 6.437355041503906, "learning_rate": 1.9844112603407645e-06, "loss": 0.8209, "step": 1945 }, { "epoch": 0.28, "grad_norm": 6.06626033782959, "learning_rate": 1.984383408956016e-06, "loss": 0.6864, "step": 1946 }, { "epoch": 0.28, "grad_norm": 5.818572044372559, "learning_rate": 1.9843555329091404e-06, "loss": 0.6844, "step": 1947 }, { "epoch": 0.28, "grad_norm": 6.259227275848389, "learning_rate": 1.9843276322008367e-06, "loss": 0.7379, "step": 1948 }, { "epoch": 0.28, "grad_norm": 6.0668745040893555, "learning_rate": 1.9842997068318035e-06, "loss": 0.761, "step": 1949 }, { "epoch": 0.28, "grad_norm": 6.440159797668457, "learning_rate": 1.9842717568027406e-06, "loss": 0.7546, "step": 1950 }, { "epoch": 0.28, "grad_norm": 6.075245380401611, "learning_rate": 1.9842437821143475e-06, "loss": 0.7645, "step": 1951 }, { "epoch": 0.28, "grad_norm": 6.239591121673584, "learning_rate": 1.984215782767326e-06, "loss": 0.7684, "step": 1952 }, { "epoch": 0.28, "grad_norm": 6.314852237701416, "learning_rate": 1.9841877587623778e-06, "loss": 0.8683, "step": 1953 }, { "epoch": 0.28, "grad_norm": 6.616854190826416, "learning_rate": 1.9841597101002037e-06, "loss": 0.7481, "step": 1954 }, { "epoch": 0.28, "grad_norm": 6.775698184967041, "learning_rate": 1.9841316367815075e-06, "loss": 0.7245, "step": 1955 }, { "epoch": 0.28, "grad_norm": 6.196733474731445, "learning_rate": 1.984103538806992e-06, "loss": 0.7895, "step": 1956 }, { "epoch": 0.28, "grad_norm": 7.40113639831543, "learning_rate": 1.9840754161773615e-06, "loss": 0.8328, "step": 1957 }, { "epoch": 0.28, "grad_norm": 6.568786144256592, "learning_rate": 1.9840472688933207e-06, "loss": 0.7162, "step": 1958 }, { "epoch": 0.28, "grad_norm": 6.760060787200928, "learning_rate": 1.984019096955574e-06, "loss": 0.7979, "step": 1959 }, { "epoch": 0.28, "grad_norm": 7.48708438873291, "learning_rate": 1.9839909003648277e-06, "loss": 0.6789, "step": 1960 }, { "epoch": 0.28, "grad_norm": 6.645330429077148, "learning_rate": 1.9839626791217883e-06, "loss": 0.799, "step": 1961 }, { "epoch": 0.28, "grad_norm": 5.821784496307373, "learning_rate": 1.983934433227163e-06, "loss": 0.7502, "step": 1962 }, { "epoch": 0.28, "grad_norm": 6.225460052490234, "learning_rate": 1.983906162681659e-06, "loss": 0.7125, "step": 1963 }, { "epoch": 0.28, "grad_norm": 5.825312614440918, "learning_rate": 1.9838778674859848e-06, "loss": 0.7044, "step": 1964 }, { "epoch": 0.29, "grad_norm": 5.966251373291016, "learning_rate": 1.9838495476408493e-06, "loss": 0.7398, "step": 1965 }, { "epoch": 0.29, "grad_norm": 7.557414531707764, "learning_rate": 1.983821203146962e-06, "loss": 0.7061, "step": 1966 }, { "epoch": 0.29, "grad_norm": 6.262636661529541, "learning_rate": 1.9837928340050327e-06, "loss": 0.7306, "step": 1967 }, { "epoch": 0.29, "grad_norm": 6.596365451812744, "learning_rate": 1.9837644402157725e-06, "loss": 0.8239, "step": 1968 }, { "epoch": 0.29, "grad_norm": 5.709924221038818, "learning_rate": 1.9837360217798927e-06, "loss": 0.7939, "step": 1969 }, { "epoch": 0.29, "grad_norm": 6.646899223327637, "learning_rate": 1.983707578698106e-06, "loss": 0.7251, "step": 1970 }, { "epoch": 0.29, "grad_norm": 6.532498836517334, "learning_rate": 1.9836791109711233e-06, "loss": 0.7878, "step": 1971 }, { "epoch": 0.29, "grad_norm": 7.752072811126709, "learning_rate": 1.9836506185996595e-06, "loss": 0.8431, "step": 1972 }, { "epoch": 0.29, "grad_norm": 6.147712230682373, "learning_rate": 1.983622101584427e-06, "loss": 0.7321, "step": 1973 }, { "epoch": 0.29, "grad_norm": 6.861565113067627, "learning_rate": 1.983593559926141e-06, "loss": 0.7318, "step": 1974 }, { "epoch": 0.29, "grad_norm": 5.822984218597412, "learning_rate": 1.9835649936255172e-06, "loss": 0.7317, "step": 1975 }, { "epoch": 0.29, "grad_norm": 7.190425395965576, "learning_rate": 1.9835364026832704e-06, "loss": 0.8003, "step": 1976 }, { "epoch": 0.29, "grad_norm": 6.037756443023682, "learning_rate": 1.983507787100117e-06, "loss": 0.7112, "step": 1977 }, { "epoch": 0.29, "grad_norm": 7.175300598144531, "learning_rate": 1.983479146876774e-06, "loss": 0.8346, "step": 1978 }, { "epoch": 0.29, "grad_norm": 6.816567897796631, "learning_rate": 1.983450482013959e-06, "loss": 0.7939, "step": 1979 }, { "epoch": 0.29, "grad_norm": 5.715997219085693, "learning_rate": 1.9834217925123903e-06, "loss": 0.7803, "step": 1980 }, { "epoch": 0.29, "grad_norm": 6.862188339233398, "learning_rate": 1.983393078372786e-06, "loss": 0.7612, "step": 1981 }, { "epoch": 0.29, "grad_norm": 6.411531448364258, "learning_rate": 1.9833643395958664e-06, "loss": 0.8693, "step": 1982 }, { "epoch": 0.29, "grad_norm": 5.963018417358398, "learning_rate": 1.983335576182351e-06, "loss": 0.7589, "step": 1983 }, { "epoch": 0.29, "grad_norm": 6.577706813812256, "learning_rate": 1.9833067881329602e-06, "loss": 0.7767, "step": 1984 }, { "epoch": 0.29, "grad_norm": 6.07047700881958, "learning_rate": 1.983277975448416e-06, "loss": 0.7413, "step": 1985 }, { "epoch": 0.29, "grad_norm": 6.31978178024292, "learning_rate": 1.983249138129439e-06, "loss": 0.786, "step": 1986 }, { "epoch": 0.29, "grad_norm": 6.063157558441162, "learning_rate": 1.983220276176753e-06, "loss": 0.7563, "step": 1987 }, { "epoch": 0.29, "grad_norm": 6.238227844238281, "learning_rate": 1.9831913895910807e-06, "loss": 0.7121, "step": 1988 }, { "epoch": 0.29, "grad_norm": 5.931545734405518, "learning_rate": 1.9831624783731455e-06, "loss": 0.7931, "step": 1989 }, { "epoch": 0.29, "grad_norm": 5.965508460998535, "learning_rate": 1.983133542523672e-06, "loss": 0.6707, "step": 1990 }, { "epoch": 0.29, "grad_norm": 6.158000946044922, "learning_rate": 1.983104582043385e-06, "loss": 0.7763, "step": 1991 }, { "epoch": 0.29, "grad_norm": 5.924190998077393, "learning_rate": 1.98307559693301e-06, "loss": 0.7516, "step": 1992 }, { "epoch": 0.29, "grad_norm": 5.92521333694458, "learning_rate": 1.983046587193273e-06, "loss": 0.8207, "step": 1993 }, { "epoch": 0.29, "grad_norm": 6.099073886871338, "learning_rate": 1.9830175528249013e-06, "loss": 0.7662, "step": 1994 }, { "epoch": 0.29, "grad_norm": 5.811199188232422, "learning_rate": 1.982988493828622e-06, "loss": 0.7251, "step": 1995 }, { "epoch": 0.29, "grad_norm": 6.741960525512695, "learning_rate": 1.982959410205163e-06, "loss": 0.8001, "step": 1996 }, { "epoch": 0.29, "grad_norm": 6.050787448883057, "learning_rate": 1.982930301955253e-06, "loss": 0.7648, "step": 1997 }, { "epoch": 0.29, "grad_norm": 5.619805335998535, "learning_rate": 1.982901169079622e-06, "loss": 0.7713, "step": 1998 }, { "epoch": 0.29, "grad_norm": 5.3513970375061035, "learning_rate": 1.9828720115789986e-06, "loss": 0.7, "step": 1999 }, { "epoch": 0.29, "grad_norm": 6.002900123596191, "learning_rate": 1.9828428294541144e-06, "loss": 0.7273, "step": 2000 }, { "epoch": 0.29, "grad_norm": 6.99629020690918, "learning_rate": 1.9828136227057e-06, "loss": 0.8489, "step": 2001 }, { "epoch": 0.29, "grad_norm": 5.523172378540039, "learning_rate": 1.982784391334487e-06, "loss": 0.7235, "step": 2002 }, { "epoch": 0.29, "grad_norm": 6.265200614929199, "learning_rate": 1.982755135341208e-06, "loss": 0.7622, "step": 2003 }, { "epoch": 0.29, "grad_norm": 6.958442687988281, "learning_rate": 1.982725854726596e-06, "loss": 0.7987, "step": 2004 }, { "epoch": 0.29, "grad_norm": 6.4711151123046875, "learning_rate": 1.9826965494913843e-06, "loss": 0.6911, "step": 2005 }, { "epoch": 0.29, "grad_norm": 6.34489107131958, "learning_rate": 1.9826672196363075e-06, "loss": 0.8194, "step": 2006 }, { "epoch": 0.29, "grad_norm": 6.401500225067139, "learning_rate": 1.9826378651621e-06, "loss": 0.791, "step": 2007 }, { "epoch": 0.29, "grad_norm": 6.113251686096191, "learning_rate": 1.9826084860694973e-06, "loss": 0.7974, "step": 2008 }, { "epoch": 0.29, "grad_norm": 5.423037052154541, "learning_rate": 1.9825790823592356e-06, "loss": 0.6837, "step": 2009 }, { "epoch": 0.29, "grad_norm": 5.902263641357422, "learning_rate": 1.9825496540320517e-06, "loss": 0.743, "step": 2010 }, { "epoch": 0.29, "grad_norm": 6.9440388679504395, "learning_rate": 1.9825202010886826e-06, "loss": 0.8003, "step": 2011 }, { "epoch": 0.29, "grad_norm": 6.411190986633301, "learning_rate": 1.9824907235298665e-06, "loss": 0.766, "step": 2012 }, { "epoch": 0.29, "grad_norm": 6.97528076171875, "learning_rate": 1.9824612213563416e-06, "loss": 0.7876, "step": 2013 }, { "epoch": 0.29, "grad_norm": 6.466606140136719, "learning_rate": 1.982431694568847e-06, "loss": 0.7703, "step": 2014 }, { "epoch": 0.29, "grad_norm": 6.643104553222656, "learning_rate": 1.9824021431681226e-06, "loss": 0.753, "step": 2015 }, { "epoch": 0.29, "grad_norm": 5.801845073699951, "learning_rate": 1.9823725671549083e-06, "loss": 0.7007, "step": 2016 }, { "epoch": 0.29, "grad_norm": 6.604569911956787, "learning_rate": 1.9823429665299464e-06, "loss": 0.871, "step": 2017 }, { "epoch": 0.29, "grad_norm": 6.476308822631836, "learning_rate": 1.982313341293977e-06, "loss": 0.7086, "step": 2018 }, { "epoch": 0.29, "grad_norm": 6.256526470184326, "learning_rate": 1.982283691447743e-06, "loss": 0.75, "step": 2019 }, { "epoch": 0.29, "grad_norm": 7.134156703948975, "learning_rate": 1.9822540169919875e-06, "loss": 0.7512, "step": 2020 }, { "epoch": 0.29, "grad_norm": 6.673013210296631, "learning_rate": 1.982224317927453e-06, "loss": 0.7791, "step": 2021 }, { "epoch": 0.29, "grad_norm": 5.752661228179932, "learning_rate": 1.982194594254885e-06, "loss": 0.8031, "step": 2022 }, { "epoch": 0.29, "grad_norm": 5.87959098815918, "learning_rate": 1.9821648459750265e-06, "loss": 0.7557, "step": 2023 }, { "epoch": 0.29, "grad_norm": 5.568575859069824, "learning_rate": 1.982135073088624e-06, "loss": 0.7216, "step": 2024 }, { "epoch": 0.29, "grad_norm": 6.299580097198486, "learning_rate": 1.9821052755964232e-06, "loss": 0.6828, "step": 2025 }, { "epoch": 0.29, "grad_norm": 6.1849751472473145, "learning_rate": 1.9820754534991703e-06, "loss": 0.7334, "step": 2026 }, { "epoch": 0.29, "grad_norm": 6.398054122924805, "learning_rate": 1.9820456067976127e-06, "loss": 0.7135, "step": 2027 }, { "epoch": 0.29, "grad_norm": 6.825262546539307, "learning_rate": 1.982015735492498e-06, "loss": 0.8559, "step": 2028 }, { "epoch": 0.29, "grad_norm": 6.629091739654541, "learning_rate": 1.9819858395845745e-06, "loss": 0.7872, "step": 2029 }, { "epoch": 0.29, "grad_norm": 6.357708930969238, "learning_rate": 1.9819559190745915e-06, "loss": 0.7506, "step": 2030 }, { "epoch": 0.29, "grad_norm": 5.95387601852417, "learning_rate": 1.9819259739632985e-06, "loss": 0.7895, "step": 2031 }, { "epoch": 0.29, "grad_norm": 5.841797828674316, "learning_rate": 1.9818960042514455e-06, "loss": 0.7351, "step": 2032 }, { "epoch": 0.29, "grad_norm": 6.875962257385254, "learning_rate": 1.9818660099397838e-06, "loss": 0.8062, "step": 2033 }, { "epoch": 0.3, "grad_norm": 6.52692174911499, "learning_rate": 1.9818359910290643e-06, "loss": 0.843, "step": 2034 }, { "epoch": 0.3, "grad_norm": 6.445919513702393, "learning_rate": 1.9818059475200396e-06, "loss": 0.8604, "step": 2035 }, { "epoch": 0.3, "grad_norm": 5.58190393447876, "learning_rate": 1.9817758794134623e-06, "loss": 0.7527, "step": 2036 }, { "epoch": 0.3, "grad_norm": 5.936411380767822, "learning_rate": 1.981745786710085e-06, "loss": 0.7914, "step": 2037 }, { "epoch": 0.3, "grad_norm": 5.673824310302734, "learning_rate": 1.9817156694106627e-06, "loss": 0.7348, "step": 2038 }, { "epoch": 0.3, "grad_norm": 5.94972038269043, "learning_rate": 1.981685527515949e-06, "loss": 0.7019, "step": 2039 }, { "epoch": 0.3, "grad_norm": 5.909906387329102, "learning_rate": 1.9816553610267e-06, "loss": 0.7961, "step": 2040 }, { "epoch": 0.3, "grad_norm": 6.143725872039795, "learning_rate": 1.9816251699436704e-06, "loss": 0.8391, "step": 2041 }, { "epoch": 0.3, "grad_norm": 6.00513219833374, "learning_rate": 1.9815949542676174e-06, "loss": 0.7225, "step": 2042 }, { "epoch": 0.3, "grad_norm": 7.331344127655029, "learning_rate": 1.9815647139992977e-06, "loss": 0.7236, "step": 2043 }, { "epoch": 0.3, "grad_norm": 6.409919261932373, "learning_rate": 1.9815344491394694e-06, "loss": 0.6381, "step": 2044 }, { "epoch": 0.3, "grad_norm": 6.884680271148682, "learning_rate": 1.9815041596888897e-06, "loss": 0.7563, "step": 2045 }, { "epoch": 0.3, "grad_norm": 5.904130458831787, "learning_rate": 1.981473845648318e-06, "loss": 0.6763, "step": 2046 }, { "epoch": 0.3, "grad_norm": 6.6626505851745605, "learning_rate": 1.9814435070185146e-06, "loss": 0.83, "step": 2047 }, { "epoch": 0.3, "grad_norm": 5.800220489501953, "learning_rate": 1.9814131438002383e-06, "loss": 0.7915, "step": 2048 }, { "epoch": 0.3, "grad_norm": 5.643072605133057, "learning_rate": 1.9813827559942503e-06, "loss": 0.7037, "step": 2049 }, { "epoch": 0.3, "grad_norm": 5.985023021697998, "learning_rate": 1.981352343601312e-06, "loss": 0.7385, "step": 2050 }, { "epoch": 0.3, "grad_norm": 6.26370096206665, "learning_rate": 1.981321906622185e-06, "loss": 0.727, "step": 2051 }, { "epoch": 0.3, "grad_norm": 7.0868659019470215, "learning_rate": 1.9812914450576323e-06, "loss": 0.76, "step": 2052 }, { "epoch": 0.3, "grad_norm": 6.044919967651367, "learning_rate": 1.981260958908417e-06, "loss": 0.7934, "step": 2053 }, { "epoch": 0.3, "grad_norm": 6.138591289520264, "learning_rate": 1.9812304481753025e-06, "loss": 0.8225, "step": 2054 }, { "epoch": 0.3, "grad_norm": 7.0015363693237305, "learning_rate": 1.981199912859054e-06, "loss": 0.8023, "step": 2055 }, { "epoch": 0.3, "grad_norm": 5.659079074859619, "learning_rate": 1.9811693529604354e-06, "loss": 0.7254, "step": 2056 }, { "epoch": 0.3, "grad_norm": 6.986546993255615, "learning_rate": 1.981138768480213e-06, "loss": 0.7048, "step": 2057 }, { "epoch": 0.3, "grad_norm": 6.129934310913086, "learning_rate": 1.981108159419153e-06, "loss": 0.7252, "step": 2058 }, { "epoch": 0.3, "grad_norm": 6.629392147064209, "learning_rate": 1.9810775257780223e-06, "loss": 0.8114, "step": 2059 }, { "epoch": 0.3, "grad_norm": 6.6053924560546875, "learning_rate": 1.981046867557588e-06, "loss": 0.8035, "step": 2060 }, { "epoch": 0.3, "grad_norm": 6.095557689666748, "learning_rate": 1.981016184758619e-06, "loss": 0.743, "step": 2061 }, { "epoch": 0.3, "grad_norm": 6.534306526184082, "learning_rate": 1.980985477381883e-06, "loss": 0.7469, "step": 2062 }, { "epoch": 0.3, "grad_norm": 6.560276508331299, "learning_rate": 1.9809547454281496e-06, "loss": 0.7017, "step": 2063 }, { "epoch": 0.3, "grad_norm": 6.5059733390808105, "learning_rate": 1.9809239888981892e-06, "loss": 0.7097, "step": 2064 }, { "epoch": 0.3, "grad_norm": 6.475485324859619, "learning_rate": 1.9808932077927724e-06, "loss": 0.7554, "step": 2065 }, { "epoch": 0.3, "grad_norm": 6.756776809692383, "learning_rate": 1.98086240211267e-06, "loss": 0.708, "step": 2066 }, { "epoch": 0.3, "grad_norm": 6.5679216384887695, "learning_rate": 1.9808315718586534e-06, "loss": 0.7955, "step": 2067 }, { "epoch": 0.3, "grad_norm": 6.2074689865112305, "learning_rate": 1.9808007170314958e-06, "loss": 0.755, "step": 2068 }, { "epoch": 0.3, "grad_norm": 6.819239616394043, "learning_rate": 1.9807698376319695e-06, "loss": 0.7134, "step": 2069 }, { "epoch": 0.3, "grad_norm": 6.824391841888428, "learning_rate": 1.9807389336608488e-06, "loss": 0.7035, "step": 2070 }, { "epoch": 0.3, "grad_norm": 5.985342025756836, "learning_rate": 1.980708005118908e-06, "loss": 0.7519, "step": 2071 }, { "epoch": 0.3, "grad_norm": 5.6569600105285645, "learning_rate": 1.980677052006921e-06, "loss": 0.694, "step": 2072 }, { "epoch": 0.3, "grad_norm": 6.024604797363281, "learning_rate": 1.980646074325664e-06, "loss": 0.7359, "step": 2073 }, { "epoch": 0.3, "grad_norm": 6.363170146942139, "learning_rate": 1.9806150720759134e-06, "loss": 0.7369, "step": 2074 }, { "epoch": 0.3, "grad_norm": 6.131258010864258, "learning_rate": 1.980584045258445e-06, "loss": 0.7451, "step": 2075 }, { "epoch": 0.3, "grad_norm": 5.981840133666992, "learning_rate": 1.980552993874037e-06, "loss": 0.7576, "step": 2076 }, { "epoch": 0.3, "grad_norm": 5.9498610496521, "learning_rate": 1.9805219179234668e-06, "loss": 0.8103, "step": 2077 }, { "epoch": 0.3, "grad_norm": 5.695178985595703, "learning_rate": 1.980490817407513e-06, "loss": 0.7365, "step": 2078 }, { "epoch": 0.3, "grad_norm": 5.997190475463867, "learning_rate": 1.9804596923269554e-06, "loss": 0.7671, "step": 2079 }, { "epoch": 0.3, "grad_norm": 6.034230709075928, "learning_rate": 1.9804285426825727e-06, "loss": 0.751, "step": 2080 }, { "epoch": 0.3, "grad_norm": 5.4778618812561035, "learning_rate": 1.9803973684751464e-06, "loss": 0.7216, "step": 2081 }, { "epoch": 0.3, "grad_norm": 6.285027503967285, "learning_rate": 1.980366169705456e-06, "loss": 0.8119, "step": 2082 }, { "epoch": 0.3, "grad_norm": 5.526464939117432, "learning_rate": 1.9803349463742855e-06, "loss": 0.7799, "step": 2083 }, { "epoch": 0.3, "grad_norm": 6.491855621337891, "learning_rate": 1.980303698482415e-06, "loss": 0.7346, "step": 2084 }, { "epoch": 0.3, "grad_norm": 6.315433025360107, "learning_rate": 1.9802724260306283e-06, "loss": 0.8303, "step": 2085 }, { "epoch": 0.3, "grad_norm": 5.643115043640137, "learning_rate": 1.980241129019709e-06, "loss": 0.6948, "step": 2086 }, { "epoch": 0.3, "grad_norm": 6.180033206939697, "learning_rate": 1.9802098074504405e-06, "loss": 0.7792, "step": 2087 }, { "epoch": 0.3, "grad_norm": 5.990347862243652, "learning_rate": 1.9801784613236083e-06, "loss": 0.7843, "step": 2088 }, { "epoch": 0.3, "grad_norm": 6.1416425704956055, "learning_rate": 1.980147090639997e-06, "loss": 0.684, "step": 2089 }, { "epoch": 0.3, "grad_norm": 6.280259609222412, "learning_rate": 1.980115695400393e-06, "loss": 0.7355, "step": 2090 }, { "epoch": 0.3, "grad_norm": 6.926565647125244, "learning_rate": 1.9800842756055834e-06, "loss": 0.7662, "step": 2091 }, { "epoch": 0.3, "grad_norm": 6.815084457397461, "learning_rate": 1.980052831256354e-06, "loss": 0.8378, "step": 2092 }, { "epoch": 0.3, "grad_norm": 6.494785308837891, "learning_rate": 1.980021362353494e-06, "loss": 0.8168, "step": 2093 }, { "epoch": 0.3, "grad_norm": 5.770688056945801, "learning_rate": 1.9799898688977903e-06, "loss": 0.6907, "step": 2094 }, { "epoch": 0.3, "grad_norm": 6.050448894500732, "learning_rate": 1.9799583508900334e-06, "loss": 0.7132, "step": 2095 }, { "epoch": 0.3, "grad_norm": 6.048374176025391, "learning_rate": 1.979926808331012e-06, "loss": 0.7414, "step": 2096 }, { "epoch": 0.3, "grad_norm": 6.993400573730469, "learning_rate": 1.9798952412215168e-06, "loss": 0.8286, "step": 2097 }, { "epoch": 0.3, "grad_norm": 5.799144744873047, "learning_rate": 1.9798636495623384e-06, "loss": 0.7535, "step": 2098 }, { "epoch": 0.3, "grad_norm": 6.0269575119018555, "learning_rate": 1.9798320333542683e-06, "loss": 0.7456, "step": 2099 }, { "epoch": 0.3, "grad_norm": 5.879361629486084, "learning_rate": 1.9798003925980987e-06, "loss": 0.7491, "step": 2100 }, { "epoch": 0.3, "grad_norm": 5.753644943237305, "learning_rate": 1.9797687272946225e-06, "loss": 0.6757, "step": 2101 }, { "epoch": 0.3, "grad_norm": 5.555456638336182, "learning_rate": 1.9797370374446324e-06, "loss": 0.6841, "step": 2102 }, { "epoch": 0.31, "grad_norm": 6.358205318450928, "learning_rate": 1.979705323048923e-06, "loss": 0.7474, "step": 2103 }, { "epoch": 0.31, "grad_norm": 7.47428560256958, "learning_rate": 1.9796735841082887e-06, "loss": 0.8211, "step": 2104 }, { "epoch": 0.31, "grad_norm": 5.946042537689209, "learning_rate": 1.979641820623524e-06, "loss": 0.7311, "step": 2105 }, { "epoch": 0.31, "grad_norm": 6.706276893615723, "learning_rate": 1.979610032595426e-06, "loss": 0.7521, "step": 2106 }, { "epoch": 0.31, "grad_norm": 6.19135856628418, "learning_rate": 1.97957822002479e-06, "loss": 0.7433, "step": 2107 }, { "epoch": 0.31, "grad_norm": 6.077621936798096, "learning_rate": 1.979546382912413e-06, "loss": 0.7475, "step": 2108 }, { "epoch": 0.31, "grad_norm": 5.609902858734131, "learning_rate": 1.9795145212590933e-06, "loss": 0.6952, "step": 2109 }, { "epoch": 0.31, "grad_norm": 6.532132625579834, "learning_rate": 1.979482635065629e-06, "loss": 0.809, "step": 2110 }, { "epoch": 0.31, "grad_norm": 6.200955390930176, "learning_rate": 1.9794507243328186e-06, "loss": 0.7106, "step": 2111 }, { "epoch": 0.31, "grad_norm": 6.498031139373779, "learning_rate": 1.9794187890614615e-06, "loss": 0.7827, "step": 2112 }, { "epoch": 0.31, "grad_norm": 5.722691059112549, "learning_rate": 1.9793868292523582e-06, "loss": 0.75, "step": 2113 }, { "epoch": 0.31, "grad_norm": 6.385383605957031, "learning_rate": 1.9793548449063093e-06, "loss": 0.8016, "step": 2114 }, { "epoch": 0.31, "grad_norm": 5.723740577697754, "learning_rate": 1.979322836024116e-06, "loss": 0.7648, "step": 2115 }, { "epoch": 0.31, "grad_norm": 6.675297260284424, "learning_rate": 1.97929080260658e-06, "loss": 0.7209, "step": 2116 }, { "epoch": 0.31, "grad_norm": 6.143421649932861, "learning_rate": 1.9792587446545046e-06, "loss": 0.7917, "step": 2117 }, { "epoch": 0.31, "grad_norm": 6.519167423248291, "learning_rate": 1.979226662168692e-06, "loss": 0.7392, "step": 2118 }, { "epoch": 0.31, "grad_norm": 6.738956451416016, "learning_rate": 1.9791945551499467e-06, "loss": 0.7677, "step": 2119 }, { "epoch": 0.31, "grad_norm": 6.137755393981934, "learning_rate": 1.979162423599073e-06, "loss": 0.7189, "step": 2120 }, { "epoch": 0.31, "grad_norm": 6.65767765045166, "learning_rate": 1.9791302675168756e-06, "loss": 0.7395, "step": 2121 }, { "epoch": 0.31, "grad_norm": 6.107875823974609, "learning_rate": 1.97909808690416e-06, "loss": 0.7086, "step": 2122 }, { "epoch": 0.31, "grad_norm": 6.6147918701171875, "learning_rate": 1.9790658817617334e-06, "loss": 0.7509, "step": 2123 }, { "epoch": 0.31, "grad_norm": 5.760336875915527, "learning_rate": 1.9790336520904014e-06, "loss": 0.7166, "step": 2124 }, { "epoch": 0.31, "grad_norm": 5.877935409545898, "learning_rate": 1.9790013978909723e-06, "loss": 0.696, "step": 2125 }, { "epoch": 0.31, "grad_norm": 6.027682304382324, "learning_rate": 1.9789691191642536e-06, "loss": 0.814, "step": 2126 }, { "epoch": 0.31, "grad_norm": 5.474334716796875, "learning_rate": 1.9789368159110543e-06, "loss": 0.7075, "step": 2127 }, { "epoch": 0.31, "grad_norm": 5.909437656402588, "learning_rate": 1.9789044881321837e-06, "loss": 0.8213, "step": 2128 }, { "epoch": 0.31, "grad_norm": 6.065752029418945, "learning_rate": 1.978872135828452e-06, "loss": 0.7057, "step": 2129 }, { "epoch": 0.31, "grad_norm": 6.4986138343811035, "learning_rate": 1.978839759000669e-06, "loss": 0.7635, "step": 2130 }, { "epoch": 0.31, "grad_norm": 6.270759105682373, "learning_rate": 1.9788073576496467e-06, "loss": 0.7135, "step": 2131 }, { "epoch": 0.31, "grad_norm": 6.206775188446045, "learning_rate": 1.9787749317761962e-06, "loss": 0.7675, "step": 2132 }, { "epoch": 0.31, "grad_norm": 5.952346324920654, "learning_rate": 1.97874248138113e-06, "loss": 0.7094, "step": 2133 }, { "epoch": 0.31, "grad_norm": 6.051395893096924, "learning_rate": 1.9787100064652615e-06, "loss": 0.794, "step": 2134 }, { "epoch": 0.31, "grad_norm": 7.040628433227539, "learning_rate": 1.978677507029404e-06, "loss": 0.825, "step": 2135 }, { "epoch": 0.31, "grad_norm": 6.021763324737549, "learning_rate": 1.978644983074372e-06, "loss": 0.8252, "step": 2136 }, { "epoch": 0.31, "grad_norm": 6.0105695724487305, "learning_rate": 1.978612434600979e-06, "loss": 0.7216, "step": 2137 }, { "epoch": 0.31, "grad_norm": 6.995697975158691, "learning_rate": 1.9785798616100426e-06, "loss": 0.7958, "step": 2138 }, { "epoch": 0.31, "grad_norm": 6.140930652618408, "learning_rate": 1.9785472641023776e-06, "loss": 0.7294, "step": 2139 }, { "epoch": 0.31, "grad_norm": 6.046136379241943, "learning_rate": 1.9785146420788005e-06, "loss": 0.764, "step": 2140 }, { "epoch": 0.31, "grad_norm": 5.817548751831055, "learning_rate": 1.9784819955401294e-06, "loss": 0.7323, "step": 2141 }, { "epoch": 0.31, "grad_norm": 5.970839977264404, "learning_rate": 1.9784493244871813e-06, "loss": 0.6987, "step": 2142 }, { "epoch": 0.31, "grad_norm": 5.9328932762146, "learning_rate": 1.9784166289207754e-06, "loss": 0.6671, "step": 2143 }, { "epoch": 0.31, "grad_norm": 5.630074501037598, "learning_rate": 1.978383908841731e-06, "loss": 0.7834, "step": 2144 }, { "epoch": 0.31, "grad_norm": 6.031915187835693, "learning_rate": 1.978351164250867e-06, "loss": 0.6869, "step": 2145 }, { "epoch": 0.31, "grad_norm": 5.866945266723633, "learning_rate": 1.978318395149004e-06, "loss": 0.6648, "step": 2146 }, { "epoch": 0.31, "grad_norm": 5.523777008056641, "learning_rate": 1.978285601536964e-06, "loss": 0.7199, "step": 2147 }, { "epoch": 0.31, "grad_norm": 6.415323734283447, "learning_rate": 1.978252783415567e-06, "loss": 0.7179, "step": 2148 }, { "epoch": 0.31, "grad_norm": 7.505522727966309, "learning_rate": 1.978219940785637e-06, "loss": 0.7721, "step": 2149 }, { "epoch": 0.31, "grad_norm": 6.22361946105957, "learning_rate": 1.978187073647995e-06, "loss": 0.6462, "step": 2150 }, { "epoch": 0.31, "grad_norm": 6.361029148101807, "learning_rate": 1.9781541820034656e-06, "loss": 0.8328, "step": 2151 }, { "epoch": 0.31, "grad_norm": 6.630594253540039, "learning_rate": 1.9781212658528725e-06, "loss": 0.7097, "step": 2152 }, { "epoch": 0.31, "grad_norm": 5.951900482177734, "learning_rate": 1.97808832519704e-06, "loss": 0.7448, "step": 2153 }, { "epoch": 0.31, "grad_norm": 5.447243690490723, "learning_rate": 1.978055360036794e-06, "loss": 0.6321, "step": 2154 }, { "epoch": 0.31, "grad_norm": 6.328221321105957, "learning_rate": 1.97802237037296e-06, "loss": 0.7758, "step": 2155 }, { "epoch": 0.31, "grad_norm": 6.506606578826904, "learning_rate": 1.9779893562063646e-06, "loss": 0.7521, "step": 2156 }, { "epoch": 0.31, "grad_norm": 6.18219518661499, "learning_rate": 1.977956317537835e-06, "loss": 0.8252, "step": 2157 }, { "epoch": 0.31, "grad_norm": 6.308098316192627, "learning_rate": 1.9779232543681993e-06, "loss": 0.8199, "step": 2158 }, { "epoch": 0.31, "grad_norm": 7.42523193359375, "learning_rate": 1.977890166698285e-06, "loss": 0.7967, "step": 2159 }, { "epoch": 0.31, "grad_norm": 6.233387470245361, "learning_rate": 1.9778570545289217e-06, "loss": 0.7494, "step": 2160 }, { "epoch": 0.31, "grad_norm": 5.67948055267334, "learning_rate": 1.9778239178609385e-06, "loss": 0.7241, "step": 2161 }, { "epoch": 0.31, "grad_norm": 6.316986560821533, "learning_rate": 1.9777907566951657e-06, "loss": 0.8434, "step": 2162 }, { "epoch": 0.31, "grad_norm": 6.005177021026611, "learning_rate": 1.9777575710324345e-06, "loss": 0.6995, "step": 2163 }, { "epoch": 0.31, "grad_norm": 5.807883262634277, "learning_rate": 1.977724360873576e-06, "loss": 0.7903, "step": 2164 }, { "epoch": 0.31, "grad_norm": 5.844158172607422, "learning_rate": 1.9776911262194225e-06, "loss": 0.7128, "step": 2165 }, { "epoch": 0.31, "grad_norm": 5.570040225982666, "learning_rate": 1.9776578670708063e-06, "loss": 0.7137, "step": 2166 }, { "epoch": 0.31, "grad_norm": 5.766851425170898, "learning_rate": 1.9776245834285604e-06, "loss": 0.6914, "step": 2167 }, { "epoch": 0.31, "grad_norm": 6.805270195007324, "learning_rate": 1.9775912752935195e-06, "loss": 0.8055, "step": 2168 }, { "epoch": 0.31, "grad_norm": 6.185677528381348, "learning_rate": 1.9775579426665176e-06, "loss": 0.8263, "step": 2169 }, { "epoch": 0.31, "grad_norm": 6.468842029571533, "learning_rate": 1.9775245855483897e-06, "loss": 0.701, "step": 2170 }, { "epoch": 0.32, "grad_norm": 6.179013252258301, "learning_rate": 1.977491203939971e-06, "loss": 0.7649, "step": 2171 }, { "epoch": 0.32, "grad_norm": 6.251682281494141, "learning_rate": 1.9774577978420992e-06, "loss": 0.7586, "step": 2172 }, { "epoch": 0.32, "grad_norm": 6.992425918579102, "learning_rate": 1.9774243672556103e-06, "loss": 0.6998, "step": 2173 }, { "epoch": 0.32, "grad_norm": 6.049857139587402, "learning_rate": 1.977390912181342e-06, "loss": 0.7895, "step": 2174 }, { "epoch": 0.32, "grad_norm": 6.2520833015441895, "learning_rate": 1.9773574326201324e-06, "loss": 0.7255, "step": 2175 }, { "epoch": 0.32, "grad_norm": 6.008559703826904, "learning_rate": 1.9773239285728205e-06, "loss": 0.834, "step": 2176 }, { "epoch": 0.32, "grad_norm": 6.124277114868164, "learning_rate": 1.9772904000402455e-06, "loss": 0.8069, "step": 2177 }, { "epoch": 0.32, "grad_norm": 6.325206756591797, "learning_rate": 1.9772568470232476e-06, "loss": 0.7261, "step": 2178 }, { "epoch": 0.32, "grad_norm": 6.261075496673584, "learning_rate": 1.977223269522667e-06, "loss": 0.7875, "step": 2179 }, { "epoch": 0.32, "grad_norm": 5.9604291915893555, "learning_rate": 1.9771896675393455e-06, "loss": 0.7883, "step": 2180 }, { "epoch": 0.32, "grad_norm": 6.236969947814941, "learning_rate": 1.9771560410741247e-06, "loss": 0.7537, "step": 2181 }, { "epoch": 0.32, "grad_norm": 6.172711372375488, "learning_rate": 1.9771223901278467e-06, "loss": 0.7535, "step": 2182 }, { "epoch": 0.32, "grad_norm": 6.117133140563965, "learning_rate": 1.9770887147013547e-06, "loss": 0.7691, "step": 2183 }, { "epoch": 0.32, "grad_norm": 6.251873970031738, "learning_rate": 1.977055014795493e-06, "loss": 0.7309, "step": 2184 }, { "epoch": 0.32, "grad_norm": 6.527101993560791, "learning_rate": 1.9770212904111056e-06, "loss": 0.8001, "step": 2185 }, { "epoch": 0.32, "grad_norm": 6.5183868408203125, "learning_rate": 1.9769875415490365e-06, "loss": 0.7908, "step": 2186 }, { "epoch": 0.32, "grad_norm": 6.675915718078613, "learning_rate": 1.9769537682101327e-06, "loss": 0.7684, "step": 2187 }, { "epoch": 0.32, "grad_norm": 5.631196022033691, "learning_rate": 1.9769199703952394e-06, "loss": 0.738, "step": 2188 }, { "epoch": 0.32, "grad_norm": 6.891487121582031, "learning_rate": 1.9768861481052034e-06, "loss": 0.7531, "step": 2189 }, { "epoch": 0.32, "grad_norm": 6.209946155548096, "learning_rate": 1.976852301340872e-06, "loss": 0.7075, "step": 2190 }, { "epoch": 0.32, "grad_norm": 6.708797454833984, "learning_rate": 1.976818430103094e-06, "loss": 0.8441, "step": 2191 }, { "epoch": 0.32, "grad_norm": 6.147312164306641, "learning_rate": 1.976784534392717e-06, "loss": 0.7075, "step": 2192 }, { "epoch": 0.32, "grad_norm": 6.2967610359191895, "learning_rate": 1.9767506142105907e-06, "loss": 0.7519, "step": 2193 }, { "epoch": 0.32, "grad_norm": 6.1343560218811035, "learning_rate": 1.976716669557565e-06, "loss": 0.6861, "step": 2194 }, { "epoch": 0.32, "grad_norm": 5.8217573165893555, "learning_rate": 1.97668270043449e-06, "loss": 0.6788, "step": 2195 }, { "epoch": 0.32, "grad_norm": 6.695927619934082, "learning_rate": 1.976648706842217e-06, "loss": 0.7455, "step": 2196 }, { "epoch": 0.32, "grad_norm": 6.035162448883057, "learning_rate": 1.9766146887815973e-06, "loss": 0.7526, "step": 2197 }, { "epoch": 0.32, "grad_norm": 6.0476579666137695, "learning_rate": 1.9765806462534837e-06, "loss": 0.7661, "step": 2198 }, { "epoch": 0.32, "grad_norm": 6.154860973358154, "learning_rate": 1.9765465792587285e-06, "loss": 0.7239, "step": 2199 }, { "epoch": 0.32, "grad_norm": 5.938501358032227, "learning_rate": 1.9765124877981855e-06, "loss": 0.7238, "step": 2200 }, { "epoch": 0.32, "grad_norm": 5.894564151763916, "learning_rate": 1.976478371872709e-06, "loss": 0.705, "step": 2201 }, { "epoch": 0.32, "grad_norm": 5.727487564086914, "learning_rate": 1.9764442314831534e-06, "loss": 0.7107, "step": 2202 }, { "epoch": 0.32, "grad_norm": 7.592289447784424, "learning_rate": 1.976410066630374e-06, "loss": 0.6914, "step": 2203 }, { "epoch": 0.32, "grad_norm": 5.941160202026367, "learning_rate": 1.976375877315227e-06, "loss": 0.8034, "step": 2204 }, { "epoch": 0.32, "grad_norm": 6.697109699249268, "learning_rate": 1.9763416635385686e-06, "loss": 0.7256, "step": 2205 }, { "epoch": 0.32, "grad_norm": 6.037649154663086, "learning_rate": 1.9763074253012568e-06, "loss": 0.6655, "step": 2206 }, { "epoch": 0.32, "grad_norm": 6.110315322875977, "learning_rate": 1.9762731626041482e-06, "loss": 0.7372, "step": 2207 }, { "epoch": 0.32, "grad_norm": 6.2989397048950195, "learning_rate": 1.976238875448102e-06, "loss": 0.7584, "step": 2208 }, { "epoch": 0.32, "grad_norm": 7.069276332855225, "learning_rate": 1.976204563833977e-06, "loss": 0.8069, "step": 2209 }, { "epoch": 0.32, "grad_norm": 6.545389175415039, "learning_rate": 1.976170227762633e-06, "loss": 0.7283, "step": 2210 }, { "epoch": 0.32, "grad_norm": 5.7722930908203125, "learning_rate": 1.97613586723493e-06, "loss": 0.7365, "step": 2211 }, { "epoch": 0.32, "grad_norm": 6.215630531311035, "learning_rate": 1.9761014822517286e-06, "loss": 0.7458, "step": 2212 }, { "epoch": 0.32, "grad_norm": 6.06522798538208, "learning_rate": 1.9760670728138907e-06, "loss": 0.8792, "step": 2213 }, { "epoch": 0.32, "grad_norm": 6.826237678527832, "learning_rate": 1.9760326389222785e-06, "loss": 0.7984, "step": 2214 }, { "epoch": 0.32, "grad_norm": 5.897088050842285, "learning_rate": 1.975998180577754e-06, "loss": 0.787, "step": 2215 }, { "epoch": 0.32, "grad_norm": 6.644207954406738, "learning_rate": 1.975963697781181e-06, "loss": 0.7675, "step": 2216 }, { "epoch": 0.32, "grad_norm": 6.379863739013672, "learning_rate": 1.9759291905334235e-06, "loss": 0.8244, "step": 2217 }, { "epoch": 0.32, "grad_norm": 5.959098815917969, "learning_rate": 1.975894658835346e-06, "loss": 0.7288, "step": 2218 }, { "epoch": 0.32, "grad_norm": 6.751477241516113, "learning_rate": 1.975860102687813e-06, "loss": 0.7153, "step": 2219 }, { "epoch": 0.32, "grad_norm": 6.327678203582764, "learning_rate": 1.975825522091691e-06, "loss": 0.8201, "step": 2220 }, { "epoch": 0.32, "grad_norm": 6.168667793273926, "learning_rate": 1.9757909170478465e-06, "loss": 0.7719, "step": 2221 }, { "epoch": 0.32, "grad_norm": 5.831554889678955, "learning_rate": 1.9757562875571454e-06, "loss": 0.736, "step": 2222 }, { "epoch": 0.32, "grad_norm": 5.990253925323486, "learning_rate": 1.975721633620456e-06, "loss": 0.7598, "step": 2223 }, { "epoch": 0.32, "grad_norm": 6.029994487762451, "learning_rate": 1.9756869552386467e-06, "loss": 0.7706, "step": 2224 }, { "epoch": 0.32, "grad_norm": 6.347368240356445, "learning_rate": 1.9756522524125864e-06, "loss": 0.7098, "step": 2225 }, { "epoch": 0.32, "grad_norm": 5.884416103363037, "learning_rate": 1.9756175251431436e-06, "loss": 0.7408, "step": 2226 }, { "epoch": 0.32, "grad_norm": 5.985201835632324, "learning_rate": 1.975582773431189e-06, "loss": 0.7614, "step": 2227 }, { "epoch": 0.32, "grad_norm": 6.2308430671691895, "learning_rate": 1.9755479972775934e-06, "loss": 0.6969, "step": 2228 }, { "epoch": 0.32, "grad_norm": 5.418961048126221, "learning_rate": 1.9755131966832278e-06, "loss": 0.744, "step": 2229 }, { "epoch": 0.32, "grad_norm": 6.483511924743652, "learning_rate": 1.975478371648964e-06, "loss": 0.742, "step": 2230 }, { "epoch": 0.32, "grad_norm": 6.186492443084717, "learning_rate": 1.9754435221756745e-06, "loss": 0.6994, "step": 2231 }, { "epoch": 0.32, "grad_norm": 6.6888556480407715, "learning_rate": 1.9754086482642326e-06, "loss": 0.7738, "step": 2232 }, { "epoch": 0.32, "grad_norm": 7.670051097869873, "learning_rate": 1.975373749915512e-06, "loss": 0.8128, "step": 2233 }, { "epoch": 0.32, "grad_norm": 5.790083408355713, "learning_rate": 1.9753388271303866e-06, "loss": 0.7203, "step": 2234 }, { "epoch": 0.32, "grad_norm": 6.570247650146484, "learning_rate": 1.9753038799097318e-06, "loss": 0.727, "step": 2235 }, { "epoch": 0.32, "grad_norm": 5.917374610900879, "learning_rate": 1.9752689082544235e-06, "loss": 0.713, "step": 2236 }, { "epoch": 0.32, "grad_norm": 6.300371170043945, "learning_rate": 1.975233912165337e-06, "loss": 0.7411, "step": 2237 }, { "epoch": 0.32, "grad_norm": 5.969343185424805, "learning_rate": 1.975198891643349e-06, "loss": 0.7579, "step": 2238 }, { "epoch": 0.32, "grad_norm": 6.1542510986328125, "learning_rate": 1.9751638466893375e-06, "loss": 0.7154, "step": 2239 }, { "epoch": 0.33, "grad_norm": 5.6074538230896, "learning_rate": 1.9751287773041806e-06, "loss": 0.7442, "step": 2240 }, { "epoch": 0.33, "grad_norm": 6.960555076599121, "learning_rate": 1.9750936834887562e-06, "loss": 0.8271, "step": 2241 }, { "epoch": 0.33, "grad_norm": 6.063498020172119, "learning_rate": 1.975058565243944e-06, "loss": 0.8075, "step": 2242 }, { "epoch": 0.33, "grad_norm": 6.091700077056885, "learning_rate": 1.975023422570624e-06, "loss": 0.7527, "step": 2243 }, { "epoch": 0.33, "grad_norm": 6.385645389556885, "learning_rate": 1.9749882554696764e-06, "loss": 0.6516, "step": 2244 }, { "epoch": 0.33, "grad_norm": 5.535009384155273, "learning_rate": 1.974953063941982e-06, "loss": 0.6733, "step": 2245 }, { "epoch": 0.33, "grad_norm": 5.774445056915283, "learning_rate": 1.974917847988423e-06, "loss": 0.7242, "step": 2246 }, { "epoch": 0.33, "grad_norm": 6.174437522888184, "learning_rate": 1.974882607609881e-06, "loss": 0.6676, "step": 2247 }, { "epoch": 0.33, "grad_norm": 5.906003475189209, "learning_rate": 1.9748473428072395e-06, "loss": 0.7491, "step": 2248 }, { "epoch": 0.33, "grad_norm": 7.05725622177124, "learning_rate": 1.974812053581382e-06, "loss": 0.7416, "step": 2249 }, { "epoch": 0.33, "grad_norm": 5.9340033531188965, "learning_rate": 1.974776739933192e-06, "loss": 0.828, "step": 2250 }, { "epoch": 0.33, "grad_norm": 6.166973114013672, "learning_rate": 1.974741401863555e-06, "loss": 0.7427, "step": 2251 }, { "epoch": 0.33, "grad_norm": 5.951996326446533, "learning_rate": 1.9747060393733555e-06, "loss": 0.8072, "step": 2252 }, { "epoch": 0.33, "grad_norm": 5.628836631774902, "learning_rate": 1.9746706524634805e-06, "loss": 0.7166, "step": 2253 }, { "epoch": 0.33, "grad_norm": 5.554754257202148, "learning_rate": 1.9746352411348154e-06, "loss": 0.7178, "step": 2254 }, { "epoch": 0.33, "grad_norm": 5.935760021209717, "learning_rate": 1.9745998053882483e-06, "loss": 0.6963, "step": 2255 }, { "epoch": 0.33, "grad_norm": 5.708189010620117, "learning_rate": 1.9745643452246664e-06, "loss": 0.7728, "step": 2256 }, { "epoch": 0.33, "grad_norm": 6.4811201095581055, "learning_rate": 1.9745288606449584e-06, "loss": 0.7944, "step": 2257 }, { "epoch": 0.33, "grad_norm": 6.734376907348633, "learning_rate": 1.9744933516500135e-06, "loss": 0.7693, "step": 2258 }, { "epoch": 0.33, "grad_norm": 6.395251274108887, "learning_rate": 1.974457818240721e-06, "loss": 0.75, "step": 2259 }, { "epoch": 0.33, "grad_norm": 5.5698065757751465, "learning_rate": 1.9744222604179713e-06, "loss": 0.725, "step": 2260 }, { "epoch": 0.33, "grad_norm": 6.549002170562744, "learning_rate": 1.974386678182655e-06, "loss": 0.7394, "step": 2261 }, { "epoch": 0.33, "grad_norm": 6.719789028167725, "learning_rate": 1.9743510715356634e-06, "loss": 0.8144, "step": 2262 }, { "epoch": 0.33, "grad_norm": 5.851626873016357, "learning_rate": 1.974315440477889e-06, "loss": 0.7145, "step": 2263 }, { "epoch": 0.33, "grad_norm": 5.860550403594971, "learning_rate": 1.9742797850102243e-06, "loss": 0.6866, "step": 2264 }, { "epoch": 0.33, "grad_norm": 6.436464786529541, "learning_rate": 1.9742441051335626e-06, "loss": 0.7553, "step": 2265 }, { "epoch": 0.33, "grad_norm": 6.818467140197754, "learning_rate": 1.9742084008487982e-06, "loss": 0.8139, "step": 2266 }, { "epoch": 0.33, "grad_norm": 6.007131576538086, "learning_rate": 1.9741726721568252e-06, "loss": 0.6422, "step": 2267 }, { "epoch": 0.33, "grad_norm": 5.963828086853027, "learning_rate": 1.974136919058538e-06, "loss": 0.7536, "step": 2268 }, { "epoch": 0.33, "grad_norm": 5.945999622344971, "learning_rate": 1.9741011415548338e-06, "loss": 0.7522, "step": 2269 }, { "epoch": 0.33, "grad_norm": 6.364966869354248, "learning_rate": 1.9740653396466085e-06, "loss": 0.7764, "step": 2270 }, { "epoch": 0.33, "grad_norm": 5.403176784515381, "learning_rate": 1.974029513334758e-06, "loss": 0.7057, "step": 2271 }, { "epoch": 0.33, "grad_norm": 6.226526260375977, "learning_rate": 1.973993662620181e-06, "loss": 0.7804, "step": 2272 }, { "epoch": 0.33, "grad_norm": 5.946560859680176, "learning_rate": 1.9739577875037758e-06, "loss": 0.6846, "step": 2273 }, { "epoch": 0.33, "grad_norm": 6.339705467224121, "learning_rate": 1.97392188798644e-06, "loss": 0.7534, "step": 2274 }, { "epoch": 0.33, "grad_norm": 6.024383068084717, "learning_rate": 1.9738859640690745e-06, "loss": 0.7357, "step": 2275 }, { "epoch": 0.33, "grad_norm": 6.808294773101807, "learning_rate": 1.973850015752578e-06, "loss": 0.7146, "step": 2276 }, { "epoch": 0.33, "grad_norm": 6.455368518829346, "learning_rate": 1.9738140430378516e-06, "loss": 0.7573, "step": 2277 }, { "epoch": 0.33, "grad_norm": 5.890835762023926, "learning_rate": 1.973778045925797e-06, "loss": 0.7068, "step": 2278 }, { "epoch": 0.33, "grad_norm": 5.79408073425293, "learning_rate": 1.9737420244173156e-06, "loss": 0.7009, "step": 2279 }, { "epoch": 0.33, "grad_norm": 6.4364471435546875, "learning_rate": 1.97370597851331e-06, "loss": 0.6976, "step": 2280 }, { "epoch": 0.33, "grad_norm": 5.375070571899414, "learning_rate": 1.9736699082146834e-06, "loss": 0.7047, "step": 2281 }, { "epoch": 0.33, "grad_norm": 6.464372634887695, "learning_rate": 1.973633813522339e-06, "loss": 0.6934, "step": 2282 }, { "epoch": 0.33, "grad_norm": 6.719015121459961, "learning_rate": 1.9735976944371816e-06, "loss": 0.7245, "step": 2283 }, { "epoch": 0.33, "grad_norm": 6.522561550140381, "learning_rate": 1.9735615509601158e-06, "loss": 0.7727, "step": 2284 }, { "epoch": 0.33, "grad_norm": 6.309428691864014, "learning_rate": 1.9735253830920468e-06, "loss": 0.7631, "step": 2285 }, { "epoch": 0.33, "grad_norm": 5.954167366027832, "learning_rate": 1.9734891908338817e-06, "loss": 0.7901, "step": 2286 }, { "epoch": 0.33, "grad_norm": 7.066256523132324, "learning_rate": 1.9734529741865265e-06, "loss": 0.9248, "step": 2287 }, { "epoch": 0.33, "grad_norm": 7.260863780975342, "learning_rate": 1.973416733150889e-06, "loss": 0.752, "step": 2288 }, { "epoch": 0.33, "grad_norm": 6.781225681304932, "learning_rate": 1.973380467727876e-06, "loss": 0.7251, "step": 2289 }, { "epoch": 0.33, "grad_norm": 6.756564140319824, "learning_rate": 1.973344177918398e-06, "loss": 0.7345, "step": 2290 }, { "epoch": 0.33, "grad_norm": 6.327398777008057, "learning_rate": 1.973307863723363e-06, "loss": 0.7794, "step": 2291 }, { "epoch": 0.33, "grad_norm": 6.0371599197387695, "learning_rate": 1.97327152514368e-06, "loss": 0.8081, "step": 2292 }, { "epoch": 0.33, "grad_norm": 6.225196838378906, "learning_rate": 1.973235162180261e-06, "loss": 0.629, "step": 2293 }, { "epoch": 0.33, "grad_norm": 5.363438129425049, "learning_rate": 1.9731987748340166e-06, "loss": 0.6964, "step": 2294 }, { "epoch": 0.33, "grad_norm": 6.103430271148682, "learning_rate": 1.9731623631058575e-06, "loss": 0.8092, "step": 2295 }, { "epoch": 0.33, "grad_norm": 5.767352104187012, "learning_rate": 1.973125926996697e-06, "loss": 0.7795, "step": 2296 }, { "epoch": 0.33, "grad_norm": 6.622884273529053, "learning_rate": 1.9730894665074476e-06, "loss": 0.7509, "step": 2297 }, { "epoch": 0.33, "grad_norm": 5.538079261779785, "learning_rate": 1.973052981639023e-06, "loss": 0.7175, "step": 2298 }, { "epoch": 0.33, "grad_norm": 5.8291497230529785, "learning_rate": 1.973016472392337e-06, "loss": 0.7558, "step": 2299 }, { "epoch": 0.33, "grad_norm": 6.191126346588135, "learning_rate": 1.9729799387683036e-06, "loss": 0.8292, "step": 2300 }, { "epoch": 0.33, "grad_norm": 5.7103095054626465, "learning_rate": 1.972943380767839e-06, "loss": 0.7596, "step": 2301 }, { "epoch": 0.33, "grad_norm": 5.836813449859619, "learning_rate": 1.972906798391859e-06, "loss": 0.7213, "step": 2302 }, { "epoch": 0.33, "grad_norm": 6.9481353759765625, "learning_rate": 1.97287019164128e-06, "loss": 0.7449, "step": 2303 }, { "epoch": 0.33, "grad_norm": 6.0624542236328125, "learning_rate": 1.972833560517019e-06, "loss": 0.7554, "step": 2304 }, { "epoch": 0.33, "grad_norm": 6.5872039794921875, "learning_rate": 1.972796905019994e-06, "loss": 0.728, "step": 2305 }, { "epoch": 0.33, "grad_norm": 6.545022487640381, "learning_rate": 1.972760225151123e-06, "loss": 0.7262, "step": 2306 }, { "epoch": 0.33, "grad_norm": 5.631001949310303, "learning_rate": 1.9727235209113252e-06, "loss": 0.683, "step": 2307 }, { "epoch": 0.33, "grad_norm": 6.103191375732422, "learning_rate": 1.97268679230152e-06, "loss": 0.8157, "step": 2308 }, { "epoch": 0.34, "grad_norm": 5.971294403076172, "learning_rate": 1.9726500393226276e-06, "loss": 0.7146, "step": 2309 }, { "epoch": 0.34, "grad_norm": 6.110494613647461, "learning_rate": 1.972613261975569e-06, "loss": 0.7244, "step": 2310 }, { "epoch": 0.34, "grad_norm": 6.1422529220581055, "learning_rate": 1.9725764602612657e-06, "loss": 0.7591, "step": 2311 }, { "epoch": 0.34, "grad_norm": 6.72556209564209, "learning_rate": 1.9725396341806393e-06, "loss": 0.8002, "step": 2312 }, { "epoch": 0.34, "grad_norm": 6.136136531829834, "learning_rate": 1.9725027837346124e-06, "loss": 0.6597, "step": 2313 }, { "epoch": 0.34, "grad_norm": 5.791661262512207, "learning_rate": 1.9724659089241085e-06, "loss": 0.7938, "step": 2314 }, { "epoch": 0.34, "grad_norm": 5.857120037078857, "learning_rate": 1.972429009750051e-06, "loss": 0.6995, "step": 2315 }, { "epoch": 0.34, "grad_norm": 5.951839447021484, "learning_rate": 1.9723920862133653e-06, "loss": 0.6617, "step": 2316 }, { "epoch": 0.34, "grad_norm": 6.532242774963379, "learning_rate": 1.972355138314975e-06, "loss": 0.8842, "step": 2317 }, { "epoch": 0.34, "grad_norm": 5.686690330505371, "learning_rate": 1.972318166055808e-06, "loss": 0.6781, "step": 2318 }, { "epoch": 0.34, "grad_norm": 5.926341533660889, "learning_rate": 1.972281169436788e-06, "loss": 0.7422, "step": 2319 }, { "epoch": 0.34, "grad_norm": 6.356230735778809, "learning_rate": 1.9722441484588432e-06, "loss": 0.6954, "step": 2320 }, { "epoch": 0.34, "grad_norm": 6.031101226806641, "learning_rate": 1.9722071031229015e-06, "loss": 0.7669, "step": 2321 }, { "epoch": 0.34, "grad_norm": 6.020736217498779, "learning_rate": 1.9721700334298904e-06, "loss": 0.73, "step": 2322 }, { "epoch": 0.34, "grad_norm": 5.444368839263916, "learning_rate": 1.9721329393807382e-06, "loss": 0.7624, "step": 2323 }, { "epoch": 0.34, "grad_norm": 6.068140983581543, "learning_rate": 1.9720958209763755e-06, "loss": 0.6969, "step": 2324 }, { "epoch": 0.34, "grad_norm": 6.238653182983398, "learning_rate": 1.972058678217731e-06, "loss": 0.7904, "step": 2325 }, { "epoch": 0.34, "grad_norm": 6.368768215179443, "learning_rate": 1.972021511105736e-06, "loss": 0.7018, "step": 2326 }, { "epoch": 0.34, "grad_norm": 5.582211494445801, "learning_rate": 1.971984319641321e-06, "loss": 0.6611, "step": 2327 }, { "epoch": 0.34, "grad_norm": 6.678246974945068, "learning_rate": 1.9719471038254188e-06, "loss": 0.8208, "step": 2328 }, { "epoch": 0.34, "grad_norm": 5.734799861907959, "learning_rate": 1.9719098636589607e-06, "loss": 0.7657, "step": 2329 }, { "epoch": 0.34, "grad_norm": 6.678752422332764, "learning_rate": 1.9718725991428804e-06, "loss": 0.7195, "step": 2330 }, { "epoch": 0.34, "grad_norm": 6.858351707458496, "learning_rate": 1.9718353102781113e-06, "loss": 0.8157, "step": 2331 }, { "epoch": 0.34, "grad_norm": 6.205666542053223, "learning_rate": 1.971797997065587e-06, "loss": 0.7265, "step": 2332 }, { "epoch": 0.34, "grad_norm": 5.734076023101807, "learning_rate": 1.9717606595062437e-06, "loss": 0.6926, "step": 2333 }, { "epoch": 0.34, "grad_norm": 6.495563983917236, "learning_rate": 1.9717232976010154e-06, "loss": 0.7601, "step": 2334 }, { "epoch": 0.34, "grad_norm": 6.789012432098389, "learning_rate": 1.971685911350839e-06, "loss": 0.7416, "step": 2335 }, { "epoch": 0.34, "grad_norm": 6.931628227233887, "learning_rate": 1.9716485007566508e-06, "loss": 0.7236, "step": 2336 }, { "epoch": 0.34, "grad_norm": 6.17257022857666, "learning_rate": 1.9716110658193884e-06, "loss": 0.7896, "step": 2337 }, { "epoch": 0.34, "grad_norm": 6.434167385101318, "learning_rate": 1.9715736065399895e-06, "loss": 0.745, "step": 2338 }, { "epoch": 0.34, "grad_norm": 5.790990829467773, "learning_rate": 1.9715361229193925e-06, "loss": 0.628, "step": 2339 }, { "epoch": 0.34, "grad_norm": 6.32405424118042, "learning_rate": 1.971498614958536e-06, "loss": 0.7672, "step": 2340 }, { "epoch": 0.34, "grad_norm": 5.737856388092041, "learning_rate": 1.9714610826583608e-06, "loss": 0.7696, "step": 2341 }, { "epoch": 0.34, "grad_norm": 5.8764328956604, "learning_rate": 1.9714235260198065e-06, "loss": 0.7475, "step": 2342 }, { "epoch": 0.34, "grad_norm": 5.799693584442139, "learning_rate": 1.971385945043814e-06, "loss": 0.739, "step": 2343 }, { "epoch": 0.34, "grad_norm": 5.837165832519531, "learning_rate": 1.971348339731325e-06, "loss": 0.6814, "step": 2344 }, { "epoch": 0.34, "grad_norm": 6.706725597381592, "learning_rate": 1.9713107100832817e-06, "loss": 0.7673, "step": 2345 }, { "epoch": 0.34, "grad_norm": 6.228476047515869, "learning_rate": 1.971273056100627e-06, "loss": 0.7564, "step": 2346 }, { "epoch": 0.34, "grad_norm": 6.877481937408447, "learning_rate": 1.9712353777843036e-06, "loss": 0.7669, "step": 2347 }, { "epoch": 0.34, "grad_norm": 6.523730278015137, "learning_rate": 1.9711976751352563e-06, "loss": 0.7544, "step": 2348 }, { "epoch": 0.34, "grad_norm": 5.919323444366455, "learning_rate": 1.9711599481544288e-06, "loss": 0.6396, "step": 2349 }, { "epoch": 0.34, "grad_norm": 5.90734338760376, "learning_rate": 1.971122196842767e-06, "loss": 0.6991, "step": 2350 }, { "epoch": 0.34, "grad_norm": 5.4604387283325195, "learning_rate": 1.9710844212012167e-06, "loss": 0.666, "step": 2351 }, { "epoch": 0.34, "grad_norm": 5.576503276824951, "learning_rate": 1.9710466212307236e-06, "loss": 0.713, "step": 2352 }, { "epoch": 0.34, "grad_norm": 5.901762962341309, "learning_rate": 1.9710087969322356e-06, "loss": 0.8424, "step": 2353 }, { "epoch": 0.34, "grad_norm": 5.360905647277832, "learning_rate": 1.9709709483066995e-06, "loss": 0.7114, "step": 2354 }, { "epoch": 0.34, "grad_norm": 5.528641700744629, "learning_rate": 1.9709330753550642e-06, "loss": 0.6765, "step": 2355 }, { "epoch": 0.34, "grad_norm": 5.912535190582275, "learning_rate": 1.9708951780782787e-06, "loss": 0.7495, "step": 2356 }, { "epoch": 0.34, "grad_norm": 5.718992710113525, "learning_rate": 1.9708572564772916e-06, "loss": 0.7605, "step": 2357 }, { "epoch": 0.34, "grad_norm": 6.860738754272461, "learning_rate": 1.9708193105530534e-06, "loss": 0.7739, "step": 2358 }, { "epoch": 0.34, "grad_norm": 6.197443962097168, "learning_rate": 1.970781340306515e-06, "loss": 0.7795, "step": 2359 }, { "epoch": 0.34, "grad_norm": 5.539884090423584, "learning_rate": 1.9707433457386276e-06, "loss": 0.7456, "step": 2360 }, { "epoch": 0.34, "grad_norm": 6.1504998207092285, "learning_rate": 1.9707053268503424e-06, "loss": 0.7671, "step": 2361 }, { "epoch": 0.34, "grad_norm": 5.583695888519287, "learning_rate": 1.970667283642613e-06, "loss": 0.7643, "step": 2362 }, { "epoch": 0.34, "grad_norm": 6.5727057456970215, "learning_rate": 1.9706292161163916e-06, "loss": 0.6929, "step": 2363 }, { "epoch": 0.34, "grad_norm": 5.997016429901123, "learning_rate": 1.9705911242726326e-06, "loss": 0.6749, "step": 2364 }, { "epoch": 0.34, "grad_norm": 5.968931674957275, "learning_rate": 1.97055300811229e-06, "loss": 0.764, "step": 2365 }, { "epoch": 0.34, "grad_norm": 6.501303672790527, "learning_rate": 1.9705148676363185e-06, "loss": 0.8266, "step": 2366 }, { "epoch": 0.34, "grad_norm": 6.306561470031738, "learning_rate": 1.9704767028456743e-06, "loss": 0.7719, "step": 2367 }, { "epoch": 0.34, "grad_norm": 6.1037211418151855, "learning_rate": 1.9704385137413127e-06, "loss": 0.66, "step": 2368 }, { "epoch": 0.34, "grad_norm": 5.854272365570068, "learning_rate": 1.9704003003241913e-06, "loss": 0.7318, "step": 2369 }, { "epoch": 0.34, "grad_norm": 6.0832600593566895, "learning_rate": 1.970362062595267e-06, "loss": 0.7551, "step": 2370 }, { "epoch": 0.34, "grad_norm": 6.312249660491943, "learning_rate": 1.9703238005554977e-06, "loss": 0.7844, "step": 2371 }, { "epoch": 0.34, "grad_norm": 6.61041259765625, "learning_rate": 1.9702855142058427e-06, "loss": 0.8352, "step": 2372 }, { "epoch": 0.34, "grad_norm": 6.088423728942871, "learning_rate": 1.97024720354726e-06, "loss": 0.8696, "step": 2373 }, { "epoch": 0.34, "grad_norm": 6.386815071105957, "learning_rate": 1.9702088685807107e-06, "loss": 0.8349, "step": 2374 }, { "epoch": 0.34, "grad_norm": 6.538321495056152, "learning_rate": 1.9701705093071543e-06, "loss": 0.7655, "step": 2375 }, { "epoch": 0.34, "grad_norm": 6.421764850616455, "learning_rate": 1.970132125727552e-06, "loss": 0.7038, "step": 2376 }, { "epoch": 0.34, "grad_norm": 6.067080974578857, "learning_rate": 1.970093717842866e-06, "loss": 0.7807, "step": 2377 }, { "epoch": 0.35, "grad_norm": 5.736795902252197, "learning_rate": 1.970055285654058e-06, "loss": 0.7016, "step": 2378 }, { "epoch": 0.35, "grad_norm": 6.078934192657471, "learning_rate": 1.970016829162091e-06, "loss": 0.7459, "step": 2379 }, { "epoch": 0.35, "grad_norm": 5.972874164581299, "learning_rate": 1.9699783483679282e-06, "loss": 0.6993, "step": 2380 }, { "epoch": 0.35, "grad_norm": 5.831899166107178, "learning_rate": 1.9699398432725343e-06, "loss": 0.7057, "step": 2381 }, { "epoch": 0.35, "grad_norm": 6.044556617736816, "learning_rate": 1.9699013138768735e-06, "loss": 0.7196, "step": 2382 }, { "epoch": 0.35, "grad_norm": 6.3226165771484375, "learning_rate": 1.969862760181911e-06, "loss": 0.7308, "step": 2383 }, { "epoch": 0.35, "grad_norm": 5.832255840301514, "learning_rate": 1.969824182188613e-06, "loss": 0.708, "step": 2384 }, { "epoch": 0.35, "grad_norm": 6.846004009246826, "learning_rate": 1.969785579897946e-06, "loss": 0.8789, "step": 2385 }, { "epoch": 0.35, "grad_norm": 6.743788242340088, "learning_rate": 1.969746953310877e-06, "loss": 0.7711, "step": 2386 }, { "epoch": 0.35, "grad_norm": 6.393914222717285, "learning_rate": 1.969708302428374e-06, "loss": 0.7357, "step": 2387 }, { "epoch": 0.35, "grad_norm": 5.587921619415283, "learning_rate": 1.9696696272514045e-06, "loss": 0.7661, "step": 2388 }, { "epoch": 0.35, "grad_norm": 6.766398906707764, "learning_rate": 1.9696309277809388e-06, "loss": 0.7438, "step": 2389 }, { "epoch": 0.35, "grad_norm": 5.8413190841674805, "learning_rate": 1.969592204017945e-06, "loss": 0.643, "step": 2390 }, { "epoch": 0.35, "grad_norm": 5.852265357971191, "learning_rate": 1.9695534559633945e-06, "loss": 0.8329, "step": 2391 }, { "epoch": 0.35, "grad_norm": 5.835826873779297, "learning_rate": 1.9695146836182573e-06, "loss": 0.7595, "step": 2392 }, { "epoch": 0.35, "grad_norm": 6.187107086181641, "learning_rate": 1.9694758869835047e-06, "loss": 0.6838, "step": 2393 }, { "epoch": 0.35, "grad_norm": 6.077962398529053, "learning_rate": 1.9694370660601094e-06, "loss": 0.7844, "step": 2394 }, { "epoch": 0.35, "grad_norm": 5.64216423034668, "learning_rate": 1.9693982208490434e-06, "loss": 0.6915, "step": 2395 }, { "epoch": 0.35, "grad_norm": 5.479796409606934, "learning_rate": 1.96935935135128e-06, "loss": 0.7116, "step": 2396 }, { "epoch": 0.35, "grad_norm": 5.588947772979736, "learning_rate": 1.969320457567793e-06, "loss": 0.6605, "step": 2397 }, { "epoch": 0.35, "grad_norm": 6.225320339202881, "learning_rate": 1.9692815394995576e-06, "loss": 0.7322, "step": 2398 }, { "epoch": 0.35, "grad_norm": 5.649126052856445, "learning_rate": 1.969242597147547e-06, "loss": 0.712, "step": 2399 }, { "epoch": 0.35, "grad_norm": 6.6117634773254395, "learning_rate": 1.969203630512739e-06, "loss": 0.7819, "step": 2400 }, { "epoch": 0.35, "grad_norm": 5.885505676269531, "learning_rate": 1.9691646395961086e-06, "loss": 0.7139, "step": 2401 }, { "epoch": 0.35, "grad_norm": 5.685401916503906, "learning_rate": 1.969125624398633e-06, "loss": 0.648, "step": 2402 }, { "epoch": 0.35, "grad_norm": 5.890738487243652, "learning_rate": 1.969086584921289e-06, "loss": 0.6835, "step": 2403 }, { "epoch": 0.35, "grad_norm": 5.99753475189209, "learning_rate": 1.9690475211650554e-06, "loss": 0.7474, "step": 2404 }, { "epoch": 0.35, "grad_norm": 5.998021602630615, "learning_rate": 1.9690084331309107e-06, "loss": 0.7033, "step": 2405 }, { "epoch": 0.35, "grad_norm": 5.586967468261719, "learning_rate": 1.9689693208198345e-06, "loss": 0.6921, "step": 2406 }, { "epoch": 0.35, "grad_norm": 5.803455352783203, "learning_rate": 1.9689301842328063e-06, "loss": 0.706, "step": 2407 }, { "epoch": 0.35, "grad_norm": 5.958921909332275, "learning_rate": 1.968891023370806e-06, "loss": 0.7324, "step": 2408 }, { "epoch": 0.35, "grad_norm": 6.520224094390869, "learning_rate": 1.968851838234816e-06, "loss": 0.6426, "step": 2409 }, { "epoch": 0.35, "grad_norm": 6.359391689300537, "learning_rate": 1.9688126288258175e-06, "loss": 0.7136, "step": 2410 }, { "epoch": 0.35, "grad_norm": 7.055924415588379, "learning_rate": 1.9687733951447925e-06, "loss": 0.8282, "step": 2411 }, { "epoch": 0.35, "grad_norm": 5.694513320922852, "learning_rate": 1.9687341371927245e-06, "loss": 0.6796, "step": 2412 }, { "epoch": 0.35, "grad_norm": 6.309516429901123, "learning_rate": 1.9686948549705965e-06, "loss": 0.7133, "step": 2413 }, { "epoch": 0.35, "grad_norm": 7.224537372589111, "learning_rate": 1.9686555484793927e-06, "loss": 0.6708, "step": 2414 }, { "epoch": 0.35, "grad_norm": 7.333780765533447, "learning_rate": 1.9686162177200982e-06, "loss": 0.8822, "step": 2415 }, { "epoch": 0.35, "grad_norm": 5.9393768310546875, "learning_rate": 1.968576862693698e-06, "loss": 0.7118, "step": 2416 }, { "epoch": 0.35, "grad_norm": 6.474870204925537, "learning_rate": 1.9685374834011785e-06, "loss": 0.8041, "step": 2417 }, { "epoch": 0.35, "grad_norm": 6.677377223968506, "learning_rate": 1.968498079843526e-06, "loss": 0.7145, "step": 2418 }, { "epoch": 0.35, "grad_norm": 6.408022403717041, "learning_rate": 1.968458652021728e-06, "loss": 0.6934, "step": 2419 }, { "epoch": 0.35, "grad_norm": 5.9171929359436035, "learning_rate": 1.968419199936772e-06, "loss": 0.7367, "step": 2420 }, { "epoch": 0.35, "grad_norm": 5.751127243041992, "learning_rate": 1.968379723589646e-06, "loss": 0.696, "step": 2421 }, { "epoch": 0.35, "grad_norm": 6.165517330169678, "learning_rate": 1.96834022298134e-06, "loss": 0.7036, "step": 2422 }, { "epoch": 0.35, "grad_norm": 6.447432041168213, "learning_rate": 1.9683006981128426e-06, "loss": 0.733, "step": 2423 }, { "epoch": 0.35, "grad_norm": 7.142099857330322, "learning_rate": 1.968261148985145e-06, "loss": 0.7287, "step": 2424 }, { "epoch": 0.35, "grad_norm": 6.3785400390625, "learning_rate": 1.9682215755992374e-06, "loss": 0.7742, "step": 2425 }, { "epoch": 0.35, "grad_norm": 5.6487345695495605, "learning_rate": 1.968181977956111e-06, "loss": 0.761, "step": 2426 }, { "epoch": 0.35, "grad_norm": 5.703300476074219, "learning_rate": 1.968142356056759e-06, "loss": 0.8071, "step": 2427 }, { "epoch": 0.35, "grad_norm": 6.448052406311035, "learning_rate": 1.968102709902173e-06, "loss": 0.8049, "step": 2428 }, { "epoch": 0.35, "grad_norm": 5.26794958114624, "learning_rate": 1.9680630394933464e-06, "loss": 0.6909, "step": 2429 }, { "epoch": 0.35, "grad_norm": 5.485732078552246, "learning_rate": 1.9680233448312735e-06, "loss": 0.7161, "step": 2430 }, { "epoch": 0.35, "grad_norm": 5.649525165557861, "learning_rate": 1.9679836259169486e-06, "loss": 0.7208, "step": 2431 }, { "epoch": 0.35, "grad_norm": 6.285207271575928, "learning_rate": 1.9679438827513665e-06, "loss": 0.6597, "step": 2432 }, { "epoch": 0.35, "grad_norm": 6.448462963104248, "learning_rate": 1.9679041153355238e-06, "loss": 0.7973, "step": 2433 }, { "epoch": 0.35, "grad_norm": 5.716060638427734, "learning_rate": 1.9678643236704157e-06, "loss": 0.6599, "step": 2434 }, { "epoch": 0.35, "grad_norm": 6.510093688964844, "learning_rate": 1.9678245077570396e-06, "loss": 0.7475, "step": 2435 }, { "epoch": 0.35, "grad_norm": 6.022294998168945, "learning_rate": 1.967784667596393e-06, "loss": 0.739, "step": 2436 }, { "epoch": 0.35, "grad_norm": 6.301102638244629, "learning_rate": 1.967744803189474e-06, "loss": 0.7356, "step": 2437 }, { "epoch": 0.35, "grad_norm": 7.208588123321533, "learning_rate": 1.967704914537281e-06, "loss": 0.8, "step": 2438 }, { "epoch": 0.35, "grad_norm": 5.9646501541137695, "learning_rate": 1.9676650016408145e-06, "loss": 0.7771, "step": 2439 }, { "epoch": 0.35, "grad_norm": 7.236777305603027, "learning_rate": 1.9676250645010732e-06, "loss": 0.6719, "step": 2440 }, { "epoch": 0.35, "grad_norm": 6.006217956542969, "learning_rate": 1.9675851031190584e-06, "loss": 0.7423, "step": 2441 }, { "epoch": 0.35, "grad_norm": 6.448395252227783, "learning_rate": 1.967545117495771e-06, "loss": 0.8061, "step": 2442 }, { "epoch": 0.35, "grad_norm": 6.039913177490234, "learning_rate": 1.9675051076322125e-06, "loss": 0.7837, "step": 2443 }, { "epoch": 0.35, "grad_norm": 6.0760016441345215, "learning_rate": 1.967465073529386e-06, "loss": 0.6982, "step": 2444 }, { "epoch": 0.35, "grad_norm": 6.202208518981934, "learning_rate": 1.9674250151882935e-06, "loss": 0.6778, "step": 2445 }, { "epoch": 0.35, "grad_norm": 6.706369876861572, "learning_rate": 1.9673849326099395e-06, "loss": 0.7641, "step": 2446 }, { "epoch": 0.36, "grad_norm": 6.5793256759643555, "learning_rate": 1.9673448257953276e-06, "loss": 0.7638, "step": 2447 }, { "epoch": 0.36, "grad_norm": 5.55059814453125, "learning_rate": 1.967304694745463e-06, "loss": 0.7506, "step": 2448 }, { "epoch": 0.36, "grad_norm": 5.8960490226745605, "learning_rate": 1.9672645394613513e-06, "loss": 0.7001, "step": 2449 }, { "epoch": 0.36, "grad_norm": 6.356700420379639, "learning_rate": 1.967224359943998e-06, "loss": 0.7664, "step": 2450 }, { "epoch": 0.36, "grad_norm": 6.096324920654297, "learning_rate": 1.9671841561944094e-06, "loss": 0.7692, "step": 2451 }, { "epoch": 0.36, "grad_norm": 6.457337379455566, "learning_rate": 1.967143928213594e-06, "loss": 0.7387, "step": 2452 }, { "epoch": 0.36, "grad_norm": 6.823352336883545, "learning_rate": 1.9671036760025584e-06, "loss": 0.7378, "step": 2453 }, { "epoch": 0.36, "grad_norm": 6.82265567779541, "learning_rate": 1.9670633995623122e-06, "loss": 0.7918, "step": 2454 }, { "epoch": 0.36, "grad_norm": 6.2067484855651855, "learning_rate": 1.9670230988938634e-06, "loss": 0.7769, "step": 2455 }, { "epoch": 0.36, "grad_norm": 5.427854061126709, "learning_rate": 1.966982773998222e-06, "loss": 0.7047, "step": 2456 }, { "epoch": 0.36, "grad_norm": 5.739419460296631, "learning_rate": 1.9669424248763982e-06, "loss": 0.7233, "step": 2457 }, { "epoch": 0.36, "grad_norm": 5.9398369789123535, "learning_rate": 1.9669020515294033e-06, "loss": 0.7175, "step": 2458 }, { "epoch": 0.36, "grad_norm": 6.465791702270508, "learning_rate": 1.966861653958249e-06, "loss": 0.697, "step": 2459 }, { "epoch": 0.36, "grad_norm": 6.425656795501709, "learning_rate": 1.9668212321639465e-06, "loss": 0.6977, "step": 2460 }, { "epoch": 0.36, "grad_norm": 5.981096267700195, "learning_rate": 1.966780786147509e-06, "loss": 0.6836, "step": 2461 }, { "epoch": 0.36, "grad_norm": 5.8948211669921875, "learning_rate": 1.9667403159099497e-06, "loss": 0.7124, "step": 2462 }, { "epoch": 0.36, "grad_norm": 5.944993495941162, "learning_rate": 1.966699821452283e-06, "loss": 0.7094, "step": 2463 }, { "epoch": 0.36, "grad_norm": 6.215227127075195, "learning_rate": 1.9666593027755223e-06, "loss": 0.7548, "step": 2464 }, { "epoch": 0.36, "grad_norm": 6.015537261962891, "learning_rate": 1.966618759880684e-06, "loss": 0.6897, "step": 2465 }, { "epoch": 0.36, "grad_norm": 5.412454605102539, "learning_rate": 1.966578192768783e-06, "loss": 0.7578, "step": 2466 }, { "epoch": 0.36, "grad_norm": 5.956530570983887, "learning_rate": 1.966537601440836e-06, "loss": 0.7014, "step": 2467 }, { "epoch": 0.36, "grad_norm": 5.398041725158691, "learning_rate": 1.9664969858978597e-06, "loss": 0.6676, "step": 2468 }, { "epoch": 0.36, "grad_norm": 6.166791915893555, "learning_rate": 1.9664563461408717e-06, "loss": 0.7015, "step": 2469 }, { "epoch": 0.36, "grad_norm": 5.780527591705322, "learning_rate": 1.966415682170891e-06, "loss": 0.7291, "step": 2470 }, { "epoch": 0.36, "grad_norm": 6.394913196563721, "learning_rate": 1.966374993988935e-06, "loss": 0.8473, "step": 2471 }, { "epoch": 0.36, "grad_norm": 5.743443489074707, "learning_rate": 1.9663342815960235e-06, "loss": 0.6882, "step": 2472 }, { "epoch": 0.36, "grad_norm": 5.931828022003174, "learning_rate": 1.966293544993177e-06, "loss": 0.7406, "step": 2473 }, { "epoch": 0.36, "grad_norm": 5.733277320861816, "learning_rate": 1.966252784181416e-06, "loss": 0.7424, "step": 2474 }, { "epoch": 0.36, "grad_norm": 6.634463310241699, "learning_rate": 1.966211999161761e-06, "loss": 0.7888, "step": 2475 }, { "epoch": 0.36, "grad_norm": 5.685513019561768, "learning_rate": 1.9661711899352352e-06, "loss": 0.6488, "step": 2476 }, { "epoch": 0.36, "grad_norm": 6.119494438171387, "learning_rate": 1.9661303565028594e-06, "loss": 0.6848, "step": 2477 }, { "epoch": 0.36, "grad_norm": 6.711238861083984, "learning_rate": 1.9660894988656574e-06, "loss": 0.8205, "step": 2478 }, { "epoch": 0.36, "grad_norm": 5.809140682220459, "learning_rate": 1.966048617024653e-06, "loss": 0.7414, "step": 2479 }, { "epoch": 0.36, "grad_norm": 6.33027982711792, "learning_rate": 1.9660077109808698e-06, "loss": 0.7717, "step": 2480 }, { "epoch": 0.36, "grad_norm": 6.988955497741699, "learning_rate": 1.965966780735333e-06, "loss": 0.7872, "step": 2481 }, { "epoch": 0.36, "grad_norm": 6.007566928863525, "learning_rate": 1.965925826289068e-06, "loss": 0.7339, "step": 2482 }, { "epoch": 0.36, "grad_norm": 6.654463768005371, "learning_rate": 1.965884847643101e-06, "loss": 0.6382, "step": 2483 }, { "epoch": 0.36, "grad_norm": 6.912186622619629, "learning_rate": 1.9658438447984587e-06, "loss": 0.7535, "step": 2484 }, { "epoch": 0.36, "grad_norm": 5.662993431091309, "learning_rate": 1.965802817756168e-06, "loss": 0.7699, "step": 2485 }, { "epoch": 0.36, "grad_norm": 5.620518207550049, "learning_rate": 1.965761766517257e-06, "loss": 0.7175, "step": 2486 }, { "epoch": 0.36, "grad_norm": 6.936728000640869, "learning_rate": 1.9657206910827546e-06, "loss": 0.7045, "step": 2487 }, { "epoch": 0.36, "grad_norm": 5.587792873382568, "learning_rate": 1.965679591453689e-06, "loss": 0.697, "step": 2488 }, { "epoch": 0.36, "grad_norm": 5.987931251525879, "learning_rate": 1.9656384676310905e-06, "loss": 0.6332, "step": 2489 }, { "epoch": 0.36, "grad_norm": 5.959377765655518, "learning_rate": 1.9655973196159886e-06, "loss": 0.7836, "step": 2490 }, { "epoch": 0.36, "grad_norm": 6.711804389953613, "learning_rate": 1.965556147409415e-06, "loss": 0.8135, "step": 2491 }, { "epoch": 0.36, "grad_norm": 8.220625877380371, "learning_rate": 1.9655149510124016e-06, "loss": 0.7719, "step": 2492 }, { "epoch": 0.36, "grad_norm": 7.100621223449707, "learning_rate": 1.9654737304259795e-06, "loss": 0.7592, "step": 2493 }, { "epoch": 0.36, "grad_norm": 6.668471813201904, "learning_rate": 1.965432485651182e-06, "loss": 0.7105, "step": 2494 }, { "epoch": 0.36, "grad_norm": 6.210655689239502, "learning_rate": 1.965391216689042e-06, "loss": 0.7005, "step": 2495 }, { "epoch": 0.36, "grad_norm": 5.769625186920166, "learning_rate": 1.9653499235405934e-06, "loss": 0.8041, "step": 2496 }, { "epoch": 0.36, "grad_norm": 5.796478748321533, "learning_rate": 1.9653086062068714e-06, "loss": 0.7706, "step": 2497 }, { "epoch": 0.36, "grad_norm": 6.618143081665039, "learning_rate": 1.9652672646889104e-06, "loss": 0.7341, "step": 2498 }, { "epoch": 0.36, "grad_norm": 5.966973304748535, "learning_rate": 1.965225898987747e-06, "loss": 0.723, "step": 2499 }, { "epoch": 0.36, "grad_norm": 5.67172384262085, "learning_rate": 1.9651845091044166e-06, "loss": 0.7194, "step": 2500 }, { "epoch": 0.36, "grad_norm": 5.946873188018799, "learning_rate": 1.965143095039957e-06, "loss": 0.7869, "step": 2501 }, { "epoch": 0.36, "grad_norm": 6.168841361999512, "learning_rate": 1.9651016567954047e-06, "loss": 0.8128, "step": 2502 }, { "epoch": 0.36, "grad_norm": 6.073578357696533, "learning_rate": 1.965060194371799e-06, "loss": 0.668, "step": 2503 }, { "epoch": 0.36, "grad_norm": 5.543354034423828, "learning_rate": 1.9650187077701783e-06, "loss": 0.7513, "step": 2504 }, { "epoch": 0.36, "grad_norm": 5.4965105056762695, "learning_rate": 1.9649771969915813e-06, "loss": 0.7029, "step": 2505 }, { "epoch": 0.36, "grad_norm": 5.740621089935303, "learning_rate": 1.964935662037049e-06, "loss": 0.6656, "step": 2506 }, { "epoch": 0.36, "grad_norm": 5.17333459854126, "learning_rate": 1.9648941029076217e-06, "loss": 0.7066, "step": 2507 }, { "epoch": 0.36, "grad_norm": 5.893084526062012, "learning_rate": 1.96485251960434e-06, "loss": 0.6767, "step": 2508 }, { "epoch": 0.36, "grad_norm": 6.106215476989746, "learning_rate": 1.9648109121282463e-06, "loss": 0.7541, "step": 2509 }, { "epoch": 0.36, "grad_norm": 5.809657096862793, "learning_rate": 1.9647692804803826e-06, "loss": 0.6726, "step": 2510 }, { "epoch": 0.36, "grad_norm": 6.466381072998047, "learning_rate": 1.9647276246617926e-06, "loss": 0.745, "step": 2511 }, { "epoch": 0.36, "grad_norm": 6.615090847015381, "learning_rate": 1.9646859446735194e-06, "loss": 0.7174, "step": 2512 }, { "epoch": 0.36, "grad_norm": 5.49883508682251, "learning_rate": 1.9646442405166073e-06, "loss": 0.6839, "step": 2513 }, { "epoch": 0.36, "grad_norm": 6.082683563232422, "learning_rate": 1.9646025121921007e-06, "loss": 0.6856, "step": 2514 }, { "epoch": 0.36, "grad_norm": 6.344442844390869, "learning_rate": 1.9645607597010462e-06, "loss": 0.8524, "step": 2515 }, { "epoch": 0.37, "grad_norm": 6.310258865356445, "learning_rate": 1.9645189830444882e-06, "loss": 0.7261, "step": 2516 }, { "epoch": 0.37, "grad_norm": 6.941650867462158, "learning_rate": 1.9644771822234753e-06, "loss": 0.7332, "step": 2517 }, { "epoch": 0.37, "grad_norm": 6.747981071472168, "learning_rate": 1.964435357239053e-06, "loss": 0.7825, "step": 2518 }, { "epoch": 0.37, "grad_norm": 6.600113868713379, "learning_rate": 1.96439350809227e-06, "loss": 0.7653, "step": 2519 }, { "epoch": 0.37, "grad_norm": 6.194567680358887, "learning_rate": 1.9643516347841745e-06, "loss": 0.8117, "step": 2520 }, { "epoch": 0.37, "grad_norm": 6.170608043670654, "learning_rate": 1.9643097373158163e-06, "loss": 0.7236, "step": 2521 }, { "epoch": 0.37, "grad_norm": 5.82302713394165, "learning_rate": 1.964267815688244e-06, "loss": 0.718, "step": 2522 }, { "epoch": 0.37, "grad_norm": 5.310296058654785, "learning_rate": 1.9642258699025084e-06, "loss": 0.6528, "step": 2523 }, { "epoch": 0.37, "grad_norm": 6.860446453094482, "learning_rate": 1.9641838999596603e-06, "loss": 0.825, "step": 2524 }, { "epoch": 0.37, "grad_norm": 5.664274215698242, "learning_rate": 1.9641419058607513e-06, "loss": 0.6845, "step": 2525 }, { "epoch": 0.37, "grad_norm": 6.893051624298096, "learning_rate": 1.964099887606833e-06, "loss": 0.7156, "step": 2526 }, { "epoch": 0.37, "grad_norm": 6.617159366607666, "learning_rate": 1.964057845198959e-06, "loss": 0.7889, "step": 2527 }, { "epoch": 0.37, "grad_norm": 5.731119632720947, "learning_rate": 1.964015778638182e-06, "loss": 0.7444, "step": 2528 }, { "epoch": 0.37, "grad_norm": 5.673489093780518, "learning_rate": 1.963973687925556e-06, "loss": 0.7279, "step": 2529 }, { "epoch": 0.37, "grad_norm": 5.889000415802002, "learning_rate": 1.963931573062136e-06, "loss": 0.7517, "step": 2530 }, { "epoch": 0.37, "grad_norm": 6.402366638183594, "learning_rate": 1.963889434048976e-06, "loss": 0.8016, "step": 2531 }, { "epoch": 0.37, "grad_norm": 6.64719295501709, "learning_rate": 1.9638472708871326e-06, "loss": 0.7493, "step": 2532 }, { "epoch": 0.37, "grad_norm": 6.496438503265381, "learning_rate": 1.9638050835776617e-06, "loss": 0.8446, "step": 2533 }, { "epoch": 0.37, "grad_norm": 6.537404537200928, "learning_rate": 1.963762872121621e-06, "loss": 0.7509, "step": 2534 }, { "epoch": 0.37, "grad_norm": 5.674141883850098, "learning_rate": 1.963720636520067e-06, "loss": 0.7451, "step": 2535 }, { "epoch": 0.37, "grad_norm": 6.619046211242676, "learning_rate": 1.9636783767740588e-06, "loss": 0.7557, "step": 2536 }, { "epoch": 0.37, "grad_norm": 5.631729602813721, "learning_rate": 1.963636092884654e-06, "loss": 0.8381, "step": 2537 }, { "epoch": 0.37, "grad_norm": 6.074173450469971, "learning_rate": 1.963593784852913e-06, "loss": 0.7668, "step": 2538 }, { "epoch": 0.37, "grad_norm": 6.170452117919922, "learning_rate": 1.9635514526798958e-06, "loss": 0.8251, "step": 2539 }, { "epoch": 0.37, "grad_norm": 6.184507846832275, "learning_rate": 1.9635090963666622e-06, "loss": 0.7459, "step": 2540 }, { "epoch": 0.37, "grad_norm": 5.908174514770508, "learning_rate": 1.9634667159142734e-06, "loss": 0.8195, "step": 2541 }, { "epoch": 0.37, "grad_norm": 5.640331268310547, "learning_rate": 1.9634243113237922e-06, "loss": 0.6459, "step": 2542 }, { "epoch": 0.37, "grad_norm": 5.643704891204834, "learning_rate": 1.9633818825962797e-06, "loss": 0.6943, "step": 2543 }, { "epoch": 0.37, "grad_norm": 6.681781768798828, "learning_rate": 1.9633394297327996e-06, "loss": 0.7663, "step": 2544 }, { "epoch": 0.37, "grad_norm": 6.706789493560791, "learning_rate": 1.9632969527344155e-06, "loss": 0.8948, "step": 2545 }, { "epoch": 0.37, "grad_norm": 6.15079402923584, "learning_rate": 1.9632544516021913e-06, "loss": 0.7816, "step": 2546 }, { "epoch": 0.37, "grad_norm": 5.609745025634766, "learning_rate": 1.963211926337192e-06, "loss": 0.6726, "step": 2547 }, { "epoch": 0.37, "grad_norm": 5.786197185516357, "learning_rate": 1.963169376940483e-06, "loss": 0.7545, "step": 2548 }, { "epoch": 0.37, "grad_norm": 5.335502624511719, "learning_rate": 1.96312680341313e-06, "loss": 0.7553, "step": 2549 }, { "epoch": 0.37, "grad_norm": 5.7337260246276855, "learning_rate": 1.9630842057562e-06, "loss": 0.6966, "step": 2550 }, { "epoch": 0.37, "grad_norm": 5.869070529937744, "learning_rate": 1.96304158397076e-06, "loss": 0.6234, "step": 2551 }, { "epoch": 0.37, "grad_norm": 5.613480091094971, "learning_rate": 1.962998938057878e-06, "loss": 0.7392, "step": 2552 }, { "epoch": 0.37, "grad_norm": 6.655649185180664, "learning_rate": 1.9629562680186223e-06, "loss": 0.7808, "step": 2553 }, { "epoch": 0.37, "grad_norm": 6.907693862915039, "learning_rate": 1.9629135738540624e-06, "loss": 0.7377, "step": 2554 }, { "epoch": 0.37, "grad_norm": 5.578165054321289, "learning_rate": 1.9628708555652667e-06, "loss": 0.8151, "step": 2555 }, { "epoch": 0.37, "grad_norm": 6.013669490814209, "learning_rate": 1.9628281131533065e-06, "loss": 0.7822, "step": 2556 }, { "epoch": 0.37, "grad_norm": 6.421931266784668, "learning_rate": 1.9627853466192527e-06, "loss": 0.7392, "step": 2557 }, { "epoch": 0.37, "grad_norm": 6.14030647277832, "learning_rate": 1.962742555964176e-06, "loss": 0.7507, "step": 2558 }, { "epoch": 0.37, "grad_norm": 5.842782020568848, "learning_rate": 1.962699741189149e-06, "loss": 0.7387, "step": 2559 }, { "epoch": 0.37, "grad_norm": 6.15852165222168, "learning_rate": 1.9626569022952444e-06, "loss": 0.6788, "step": 2560 }, { "epoch": 0.37, "grad_norm": 5.953622341156006, "learning_rate": 1.962614039283535e-06, "loss": 0.7874, "step": 2561 }, { "epoch": 0.37, "grad_norm": 5.605368614196777, "learning_rate": 1.9625711521550947e-06, "loss": 0.7056, "step": 2562 }, { "epoch": 0.37, "grad_norm": 5.452855110168457, "learning_rate": 1.9625282409109986e-06, "loss": 0.6182, "step": 2563 }, { "epoch": 0.37, "grad_norm": 5.511765003204346, "learning_rate": 1.962485305552321e-06, "loss": 0.7705, "step": 2564 }, { "epoch": 0.37, "grad_norm": 6.282729625701904, "learning_rate": 1.9624423460801383e-06, "loss": 0.7347, "step": 2565 }, { "epoch": 0.37, "grad_norm": 5.098060131072998, "learning_rate": 1.962399362495526e-06, "loss": 0.7361, "step": 2566 }, { "epoch": 0.37, "grad_norm": 6.371920108795166, "learning_rate": 1.9623563547995617e-06, "loss": 0.8401, "step": 2567 }, { "epoch": 0.37, "grad_norm": 5.708228588104248, "learning_rate": 1.962313322993323e-06, "loss": 0.7241, "step": 2568 }, { "epoch": 0.37, "grad_norm": 6.313755035400391, "learning_rate": 1.962270267077887e-06, "loss": 0.8307, "step": 2569 }, { "epoch": 0.37, "grad_norm": 6.4742431640625, "learning_rate": 1.962227187054333e-06, "loss": 0.7465, "step": 2570 }, { "epoch": 0.37, "grad_norm": 6.7021660804748535, "learning_rate": 1.9621840829237408e-06, "loss": 0.7468, "step": 2571 }, { "epoch": 0.37, "grad_norm": 6.574391841888428, "learning_rate": 1.9621409546871893e-06, "loss": 0.7561, "step": 2572 }, { "epoch": 0.37, "grad_norm": 5.777669429779053, "learning_rate": 1.9620978023457594e-06, "loss": 0.7726, "step": 2573 }, { "epoch": 0.37, "grad_norm": 6.254700660705566, "learning_rate": 1.9620546259005327e-06, "loss": 0.5861, "step": 2574 }, { "epoch": 0.37, "grad_norm": 6.346971035003662, "learning_rate": 1.9620114253525904e-06, "loss": 0.7194, "step": 2575 }, { "epoch": 0.37, "grad_norm": 5.890815258026123, "learning_rate": 1.9619682007030148e-06, "loss": 0.7131, "step": 2576 }, { "epoch": 0.37, "grad_norm": 7.077683448791504, "learning_rate": 1.961924951952889e-06, "loss": 0.6587, "step": 2577 }, { "epoch": 0.37, "grad_norm": 5.465980052947998, "learning_rate": 1.9618816791032965e-06, "loss": 0.6702, "step": 2578 }, { "epoch": 0.37, "grad_norm": 6.163966655731201, "learning_rate": 1.961838382155322e-06, "loss": 0.79, "step": 2579 }, { "epoch": 0.37, "grad_norm": 5.514883518218994, "learning_rate": 1.961795061110049e-06, "loss": 0.7764, "step": 2580 }, { "epoch": 0.37, "grad_norm": 6.219621658325195, "learning_rate": 1.9617517159685634e-06, "loss": 0.6933, "step": 2581 }, { "epoch": 0.37, "grad_norm": 6.442838191986084, "learning_rate": 1.961708346731952e-06, "loss": 0.726, "step": 2582 }, { "epoch": 0.37, "grad_norm": 6.417985439300537, "learning_rate": 1.9616649534012997e-06, "loss": 0.78, "step": 2583 }, { "epoch": 0.37, "grad_norm": 5.700937747955322, "learning_rate": 1.961621535977695e-06, "loss": 0.6784, "step": 2584 }, { "epoch": 0.38, "grad_norm": 6.263084888458252, "learning_rate": 1.961578094462225e-06, "loss": 0.7531, "step": 2585 }, { "epoch": 0.38, "grad_norm": 7.47458553314209, "learning_rate": 1.961534628855978e-06, "loss": 0.7776, "step": 2586 }, { "epoch": 0.38, "grad_norm": 6.094638824462891, "learning_rate": 1.961491139160044e-06, "loss": 0.8209, "step": 2587 }, { "epoch": 0.38, "grad_norm": 5.962462902069092, "learning_rate": 1.961447625375511e-06, "loss": 0.7186, "step": 2588 }, { "epoch": 0.38, "grad_norm": 6.176087856292725, "learning_rate": 1.96140408750347e-06, "loss": 0.7826, "step": 2589 }, { "epoch": 0.38, "grad_norm": 6.19719123840332, "learning_rate": 1.961360525545012e-06, "loss": 0.729, "step": 2590 }, { "epoch": 0.38, "grad_norm": 5.509442329406738, "learning_rate": 1.9613169395012283e-06, "loss": 0.6853, "step": 2591 }, { "epoch": 0.38, "grad_norm": 5.9892802238464355, "learning_rate": 1.9612733293732102e-06, "loss": 0.7734, "step": 2592 }, { "epoch": 0.38, "grad_norm": 5.813855171203613, "learning_rate": 1.961229695162051e-06, "loss": 0.7433, "step": 2593 }, { "epoch": 0.38, "grad_norm": 6.25537109375, "learning_rate": 1.9611860368688437e-06, "loss": 0.7581, "step": 2594 }, { "epoch": 0.38, "grad_norm": 5.628176689147949, "learning_rate": 1.9611423544946815e-06, "loss": 0.7378, "step": 2595 }, { "epoch": 0.38, "grad_norm": 5.846281051635742, "learning_rate": 1.96109864804066e-06, "loss": 0.6732, "step": 2596 }, { "epoch": 0.38, "grad_norm": 5.7323408126831055, "learning_rate": 1.961054917507873e-06, "loss": 0.7146, "step": 2597 }, { "epoch": 0.38, "grad_norm": 6.151235580444336, "learning_rate": 1.961011162897417e-06, "loss": 0.811, "step": 2598 }, { "epoch": 0.38, "grad_norm": 6.020203113555908, "learning_rate": 1.960967384210387e-06, "loss": 0.7677, "step": 2599 }, { "epoch": 0.38, "grad_norm": 5.587785720825195, "learning_rate": 1.9609235814478814e-06, "loss": 0.7185, "step": 2600 }, { "epoch": 0.38, "grad_norm": 6.948749542236328, "learning_rate": 1.9608797546109964e-06, "loss": 0.7183, "step": 2601 }, { "epoch": 0.38, "grad_norm": 5.673445224761963, "learning_rate": 1.9608359037008302e-06, "loss": 0.7397, "step": 2602 }, { "epoch": 0.38, "grad_norm": 6.021262168884277, "learning_rate": 1.9607920287184824e-06, "loss": 0.7197, "step": 2603 }, { "epoch": 0.38, "grad_norm": 5.906905651092529, "learning_rate": 1.960748129665051e-06, "loss": 0.7354, "step": 2604 }, { "epoch": 0.38, "grad_norm": 6.302245140075684, "learning_rate": 1.960704206541636e-06, "loss": 0.8278, "step": 2605 }, { "epoch": 0.38, "grad_norm": 6.435387134552002, "learning_rate": 1.9606602593493385e-06, "loss": 0.6857, "step": 2606 }, { "epoch": 0.38, "grad_norm": 5.636057376861572, "learning_rate": 1.9606162880892588e-06, "loss": 0.7753, "step": 2607 }, { "epoch": 0.38, "grad_norm": 6.270735740661621, "learning_rate": 1.9605722927624993e-06, "loss": 0.7092, "step": 2608 }, { "epoch": 0.38, "grad_norm": 5.891835689544678, "learning_rate": 1.9605282733701615e-06, "loss": 0.7408, "step": 2609 }, { "epoch": 0.38, "grad_norm": 6.742282390594482, "learning_rate": 1.9604842299133484e-06, "loss": 0.8855, "step": 2610 }, { "epoch": 0.38, "grad_norm": 5.901494026184082, "learning_rate": 1.9604401623931636e-06, "loss": 0.6823, "step": 2611 }, { "epoch": 0.38, "grad_norm": 5.473692417144775, "learning_rate": 1.960396070810711e-06, "loss": 0.7529, "step": 2612 }, { "epoch": 0.38, "grad_norm": 6.602785587310791, "learning_rate": 1.9603519551670957e-06, "loss": 0.7932, "step": 2613 }, { "epoch": 0.38, "grad_norm": 6.179959297180176, "learning_rate": 1.960307815463422e-06, "loss": 0.7052, "step": 2614 }, { "epoch": 0.38, "grad_norm": 6.052942752838135, "learning_rate": 1.9602636517007966e-06, "loss": 0.7827, "step": 2615 }, { "epoch": 0.38, "grad_norm": 6.050891876220703, "learning_rate": 1.960219463880326e-06, "loss": 0.7122, "step": 2616 }, { "epoch": 0.38, "grad_norm": 6.193050861358643, "learning_rate": 1.9601752520031166e-06, "loss": 0.7212, "step": 2617 }, { "epoch": 0.38, "grad_norm": 5.98225212097168, "learning_rate": 1.960131016070276e-06, "loss": 0.7943, "step": 2618 }, { "epoch": 0.38, "grad_norm": 6.497100830078125, "learning_rate": 1.9600867560829134e-06, "loss": 0.7553, "step": 2619 }, { "epoch": 0.38, "grad_norm": 6.302478313446045, "learning_rate": 1.9600424720421367e-06, "loss": 0.756, "step": 2620 }, { "epoch": 0.38, "grad_norm": 6.544468879699707, "learning_rate": 1.959998163949056e-06, "loss": 0.7356, "step": 2621 }, { "epoch": 0.38, "grad_norm": 5.342223644256592, "learning_rate": 1.9599538318047815e-06, "loss": 0.7058, "step": 2622 }, { "epoch": 0.38, "grad_norm": 5.473176002502441, "learning_rate": 1.959909475610423e-06, "loss": 0.7515, "step": 2623 }, { "epoch": 0.38, "grad_norm": 6.052495956420898, "learning_rate": 1.9598650953670924e-06, "loss": 0.7752, "step": 2624 }, { "epoch": 0.38, "grad_norm": 6.066133499145508, "learning_rate": 1.9598206910759014e-06, "loss": 0.7563, "step": 2625 }, { "epoch": 0.38, "grad_norm": 6.315281391143799, "learning_rate": 1.959776262737963e-06, "loss": 0.6949, "step": 2626 }, { "epoch": 0.38, "grad_norm": 5.968869686126709, "learning_rate": 1.9597318103543893e-06, "loss": 0.7314, "step": 2627 }, { "epoch": 0.38, "grad_norm": 5.685793876647949, "learning_rate": 1.9596873339262946e-06, "loss": 0.6793, "step": 2628 }, { "epoch": 0.38, "grad_norm": 5.510195732116699, "learning_rate": 1.959642833454793e-06, "loss": 0.6947, "step": 2629 }, { "epoch": 0.38, "grad_norm": 5.955976486206055, "learning_rate": 1.9595983089409994e-06, "loss": 0.7396, "step": 2630 }, { "epoch": 0.38, "grad_norm": 6.961467266082764, "learning_rate": 1.9595537603860296e-06, "loss": 0.7969, "step": 2631 }, { "epoch": 0.38, "grad_norm": 6.678676128387451, "learning_rate": 1.959509187790999e-06, "loss": 0.7767, "step": 2632 }, { "epoch": 0.38, "grad_norm": 5.856191635131836, "learning_rate": 1.9594645911570255e-06, "loss": 0.744, "step": 2633 }, { "epoch": 0.38, "grad_norm": 5.7793049812316895, "learning_rate": 1.959419970485225e-06, "loss": 0.7878, "step": 2634 }, { "epoch": 0.38, "grad_norm": 5.857531547546387, "learning_rate": 1.959375325776716e-06, "loss": 0.6828, "step": 2635 }, { "epoch": 0.38, "grad_norm": 5.694693088531494, "learning_rate": 1.9593306570326177e-06, "loss": 0.6533, "step": 2636 }, { "epoch": 0.38, "grad_norm": 5.748971939086914, "learning_rate": 1.959285964254048e-06, "loss": 0.7209, "step": 2637 }, { "epoch": 0.38, "grad_norm": 6.082775115966797, "learning_rate": 1.9592412474421275e-06, "loss": 0.7627, "step": 2638 }, { "epoch": 0.38, "grad_norm": 6.407487392425537, "learning_rate": 1.9591965065979757e-06, "loss": 0.6682, "step": 2639 }, { "epoch": 0.38, "grad_norm": 5.567742347717285, "learning_rate": 1.9591517417227143e-06, "loss": 0.7393, "step": 2640 }, { "epoch": 0.38, "grad_norm": 6.304567337036133, "learning_rate": 1.959106952817464e-06, "loss": 0.662, "step": 2641 }, { "epoch": 0.38, "grad_norm": 6.635506629943848, "learning_rate": 1.9590621398833482e-06, "loss": 0.6396, "step": 2642 }, { "epoch": 0.38, "grad_norm": 5.988630771636963, "learning_rate": 1.959017302921488e-06, "loss": 0.7662, "step": 2643 }, { "epoch": 0.38, "grad_norm": 5.372693061828613, "learning_rate": 1.958972441933008e-06, "loss": 0.707, "step": 2644 }, { "epoch": 0.38, "grad_norm": 6.204242706298828, "learning_rate": 1.958927556919031e-06, "loss": 0.6845, "step": 2645 }, { "epoch": 0.38, "grad_norm": 5.64480447769165, "learning_rate": 1.958882647880683e-06, "loss": 0.6764, "step": 2646 }, { "epoch": 0.38, "grad_norm": 5.412115573883057, "learning_rate": 1.958837714819088e-06, "loss": 0.6937, "step": 2647 }, { "epoch": 0.38, "grad_norm": 6.495943069458008, "learning_rate": 1.958792757735372e-06, "loss": 0.8018, "step": 2648 }, { "epoch": 0.38, "grad_norm": 6.150335788726807, "learning_rate": 1.958747776630661e-06, "loss": 0.7723, "step": 2649 }, { "epoch": 0.38, "grad_norm": 6.141735553741455, "learning_rate": 1.9587027715060827e-06, "loss": 0.7623, "step": 2650 }, { "epoch": 0.38, "grad_norm": 7.0084686279296875, "learning_rate": 1.9586577423627637e-06, "loss": 0.7346, "step": 2651 }, { "epoch": 0.38, "grad_norm": 6.42064094543457, "learning_rate": 1.958612689201833e-06, "loss": 0.8282, "step": 2652 }, { "epoch": 0.38, "grad_norm": 6.154300212860107, "learning_rate": 1.958567612024419e-06, "loss": 0.7177, "step": 2653 }, { "epoch": 0.39, "grad_norm": 5.9831037521362305, "learning_rate": 1.958522510831651e-06, "loss": 0.6997, "step": 2654 }, { "epoch": 0.39, "grad_norm": 5.779745578765869, "learning_rate": 1.9584773856246584e-06, "loss": 0.7733, "step": 2655 }, { "epoch": 0.39, "grad_norm": 6.69302225112915, "learning_rate": 1.958432236404573e-06, "loss": 0.7647, "step": 2656 }, { "epoch": 0.39, "grad_norm": 6.139658451080322, "learning_rate": 1.9583870631725245e-06, "loss": 0.7079, "step": 2657 }, { "epoch": 0.39, "grad_norm": 6.179642677307129, "learning_rate": 1.958341865929646e-06, "loss": 0.6619, "step": 2658 }, { "epoch": 0.39, "grad_norm": 5.863410949707031, "learning_rate": 1.9582966446770686e-06, "loss": 0.7576, "step": 2659 }, { "epoch": 0.39, "grad_norm": 6.306168079376221, "learning_rate": 1.958251399415926e-06, "loss": 0.8291, "step": 2660 }, { "epoch": 0.39, "grad_norm": 6.76054573059082, "learning_rate": 1.9582061301473517e-06, "loss": 0.7747, "step": 2661 }, { "epoch": 0.39, "grad_norm": 5.420276165008545, "learning_rate": 1.95816083687248e-06, "loss": 0.6821, "step": 2662 }, { "epoch": 0.39, "grad_norm": 6.626924514770508, "learning_rate": 1.958115519592445e-06, "loss": 0.6689, "step": 2663 }, { "epoch": 0.39, "grad_norm": 5.800046443939209, "learning_rate": 1.9580701783083822e-06, "loss": 0.778, "step": 2664 }, { "epoch": 0.39, "grad_norm": 5.76637601852417, "learning_rate": 1.9580248130214285e-06, "loss": 0.7609, "step": 2665 }, { "epoch": 0.39, "grad_norm": 6.234334468841553, "learning_rate": 1.9579794237327192e-06, "loss": 0.6794, "step": 2666 }, { "epoch": 0.39, "grad_norm": 6.397754669189453, "learning_rate": 1.957934010443392e-06, "loss": 0.7928, "step": 2667 }, { "epoch": 0.39, "grad_norm": 6.501621246337891, "learning_rate": 1.957888573154585e-06, "loss": 0.7734, "step": 2668 }, { "epoch": 0.39, "grad_norm": 5.783768177032471, "learning_rate": 1.957843111867436e-06, "loss": 0.6931, "step": 2669 }, { "epoch": 0.39, "grad_norm": 5.602766990661621, "learning_rate": 1.957797626583084e-06, "loss": 0.6593, "step": 2670 }, { "epoch": 0.39, "grad_norm": 6.227962493896484, "learning_rate": 1.957752117302669e-06, "loss": 0.7375, "step": 2671 }, { "epoch": 0.39, "grad_norm": 5.8679938316345215, "learning_rate": 1.957706584027331e-06, "loss": 0.6863, "step": 2672 }, { "epoch": 0.39, "grad_norm": 5.794355869293213, "learning_rate": 1.9576610267582103e-06, "loss": 0.7623, "step": 2673 }, { "epoch": 0.39, "grad_norm": 5.91319465637207, "learning_rate": 1.9576154454964487e-06, "loss": 0.8607, "step": 2674 }, { "epoch": 0.39, "grad_norm": 5.736642360687256, "learning_rate": 1.9575698402431882e-06, "loss": 0.6624, "step": 2675 }, { "epoch": 0.39, "grad_norm": 7.199914455413818, "learning_rate": 1.9575242109995707e-06, "loss": 0.7588, "step": 2676 }, { "epoch": 0.39, "grad_norm": 6.5688395500183105, "learning_rate": 1.957478557766741e-06, "loss": 0.7964, "step": 2677 }, { "epoch": 0.39, "grad_norm": 6.641444206237793, "learning_rate": 1.9574328805458407e-06, "loss": 0.7981, "step": 2678 }, { "epoch": 0.39, "grad_norm": 5.9028167724609375, "learning_rate": 1.9573871793380156e-06, "loss": 0.7072, "step": 2679 }, { "epoch": 0.39, "grad_norm": 5.398960590362549, "learning_rate": 1.9573414541444103e-06, "loss": 0.6974, "step": 2680 }, { "epoch": 0.39, "grad_norm": 7.092748641967773, "learning_rate": 1.9572957049661706e-06, "loss": 0.7522, "step": 2681 }, { "epoch": 0.39, "grad_norm": 6.1698994636535645, "learning_rate": 1.9572499318044425e-06, "loss": 0.7813, "step": 2682 }, { "epoch": 0.39, "grad_norm": 6.137246608734131, "learning_rate": 1.9572041346603723e-06, "loss": 0.7235, "step": 2683 }, { "epoch": 0.39, "grad_norm": 5.7297539710998535, "learning_rate": 1.9571583135351084e-06, "loss": 0.7573, "step": 2684 }, { "epoch": 0.39, "grad_norm": 5.877299785614014, "learning_rate": 1.9571124684297974e-06, "loss": 0.7589, "step": 2685 }, { "epoch": 0.39, "grad_norm": 6.8468451499938965, "learning_rate": 1.9570665993455894e-06, "loss": 0.729, "step": 2686 }, { "epoch": 0.39, "grad_norm": 5.951207637786865, "learning_rate": 1.9570207062836324e-06, "loss": 0.7327, "step": 2687 }, { "epoch": 0.39, "grad_norm": 5.994930744171143, "learning_rate": 1.9569747892450767e-06, "loss": 0.7393, "step": 2688 }, { "epoch": 0.39, "grad_norm": 5.946285247802734, "learning_rate": 1.9569288482310726e-06, "loss": 0.6944, "step": 2689 }, { "epoch": 0.39, "grad_norm": 5.744028091430664, "learning_rate": 1.9568828832427708e-06, "loss": 0.6998, "step": 2690 }, { "epoch": 0.39, "grad_norm": 5.796054840087891, "learning_rate": 1.956836894281323e-06, "loss": 0.6767, "step": 2691 }, { "epoch": 0.39, "grad_norm": 6.183208465576172, "learning_rate": 1.9567908813478823e-06, "loss": 0.6966, "step": 2692 }, { "epoch": 0.39, "grad_norm": 6.708256244659424, "learning_rate": 1.9567448444436e-06, "loss": 0.7606, "step": 2693 }, { "epoch": 0.39, "grad_norm": 6.977267265319824, "learning_rate": 1.9566987835696307e-06, "loss": 0.7411, "step": 2694 }, { "epoch": 0.39, "grad_norm": 6.123320579528809, "learning_rate": 1.9566526987271274e-06, "loss": 0.7343, "step": 2695 }, { "epoch": 0.39, "grad_norm": 6.0400261878967285, "learning_rate": 1.9566065899172453e-06, "loss": 0.7161, "step": 2696 }, { "epoch": 0.39, "grad_norm": 6.261206150054932, "learning_rate": 1.9565604571411395e-06, "loss": 0.6849, "step": 2697 }, { "epoch": 0.39, "grad_norm": 6.323975563049316, "learning_rate": 1.9565143003999656e-06, "loss": 0.748, "step": 2698 }, { "epoch": 0.39, "grad_norm": 6.213433265686035, "learning_rate": 1.95646811969488e-06, "loss": 0.7321, "step": 2699 }, { "epoch": 0.39, "grad_norm": 5.897982597351074, "learning_rate": 1.95642191502704e-06, "loss": 0.6582, "step": 2700 }, { "epoch": 0.39, "grad_norm": 5.634274005889893, "learning_rate": 1.9563756863976025e-06, "loss": 0.6943, "step": 2701 }, { "epoch": 0.39, "grad_norm": 5.430141925811768, "learning_rate": 1.956329433807726e-06, "loss": 0.6733, "step": 2702 }, { "epoch": 0.39, "grad_norm": 5.743752956390381, "learning_rate": 1.9562831572585704e-06, "loss": 0.7919, "step": 2703 }, { "epoch": 0.39, "grad_norm": 6.266600131988525, "learning_rate": 1.9562368567512935e-06, "loss": 0.7552, "step": 2704 }, { "epoch": 0.39, "grad_norm": 5.99312686920166, "learning_rate": 1.9561905322870556e-06, "loss": 0.7922, "step": 2705 }, { "epoch": 0.39, "grad_norm": 5.074469089508057, "learning_rate": 1.956144183867018e-06, "loss": 0.7064, "step": 2706 }, { "epoch": 0.39, "grad_norm": 5.689623832702637, "learning_rate": 1.9560978114923413e-06, "loss": 0.6724, "step": 2707 }, { "epoch": 0.39, "grad_norm": 6.15392541885376, "learning_rate": 1.9560514151641874e-06, "loss": 0.6953, "step": 2708 }, { "epoch": 0.39, "grad_norm": 6.551229000091553, "learning_rate": 1.9560049948837185e-06, "loss": 0.7996, "step": 2709 }, { "epoch": 0.39, "grad_norm": 5.5862579345703125, "learning_rate": 1.9559585506520982e-06, "loss": 0.7026, "step": 2710 }, { "epoch": 0.39, "grad_norm": 5.62787389755249, "learning_rate": 1.9559120824704897e-06, "loss": 0.7234, "step": 2711 }, { "epoch": 0.39, "grad_norm": 5.901449680328369, "learning_rate": 1.9558655903400567e-06, "loss": 0.741, "step": 2712 }, { "epoch": 0.39, "grad_norm": 6.372224807739258, "learning_rate": 1.9558190742619648e-06, "loss": 0.7532, "step": 2713 }, { "epoch": 0.39, "grad_norm": 5.618725299835205, "learning_rate": 1.955772534237379e-06, "loss": 0.7458, "step": 2714 }, { "epoch": 0.39, "grad_norm": 6.033373832702637, "learning_rate": 1.955725970267465e-06, "loss": 0.7402, "step": 2715 }, { "epoch": 0.39, "grad_norm": 6.0707855224609375, "learning_rate": 1.9556793823533903e-06, "loss": 0.7478, "step": 2716 }, { "epoch": 0.39, "grad_norm": 6.185760974884033, "learning_rate": 1.955632770496321e-06, "loss": 0.8797, "step": 2717 }, { "epoch": 0.39, "grad_norm": 7.250920295715332, "learning_rate": 1.9555861346974256e-06, "loss": 0.8753, "step": 2718 }, { "epoch": 0.39, "grad_norm": 6.7061944007873535, "learning_rate": 1.9555394749578723e-06, "loss": 0.7538, "step": 2719 }, { "epoch": 0.39, "grad_norm": 6.138952255249023, "learning_rate": 1.95549279127883e-06, "loss": 0.8211, "step": 2720 }, { "epoch": 0.39, "grad_norm": 6.470191478729248, "learning_rate": 1.9554460836614683e-06, "loss": 0.7326, "step": 2721 }, { "epoch": 0.39, "grad_norm": 6.393682479858398, "learning_rate": 1.955399352106957e-06, "loss": 0.7061, "step": 2722 }, { "epoch": 0.4, "grad_norm": 5.29652738571167, "learning_rate": 1.955352596616468e-06, "loss": 0.6747, "step": 2723 }, { "epoch": 0.4, "grad_norm": 5.647540092468262, "learning_rate": 1.9553058171911715e-06, "loss": 0.726, "step": 2724 }, { "epoch": 0.4, "grad_norm": 5.759944915771484, "learning_rate": 1.95525901383224e-06, "loss": 0.7494, "step": 2725 }, { "epoch": 0.4, "grad_norm": 6.355412483215332, "learning_rate": 1.955212186540846e-06, "loss": 0.6929, "step": 2726 }, { "epoch": 0.4, "grad_norm": 5.90912389755249, "learning_rate": 1.955165335318163e-06, "loss": 0.7891, "step": 2727 }, { "epoch": 0.4, "grad_norm": 6.309475898742676, "learning_rate": 1.9551184601653647e-06, "loss": 0.7588, "step": 2728 }, { "epoch": 0.4, "grad_norm": 5.663712024688721, "learning_rate": 1.9550715610836252e-06, "loss": 0.6235, "step": 2729 }, { "epoch": 0.4, "grad_norm": 6.977806568145752, "learning_rate": 1.955024638074119e-06, "loss": 0.8662, "step": 2730 }, { "epoch": 0.4, "grad_norm": 6.078738212585449, "learning_rate": 1.9549776911380227e-06, "loss": 0.812, "step": 2731 }, { "epoch": 0.4, "grad_norm": 6.811643600463867, "learning_rate": 1.9549307202765115e-06, "loss": 0.7458, "step": 2732 }, { "epoch": 0.4, "grad_norm": 6.078209400177002, "learning_rate": 1.9548837254907635e-06, "loss": 0.7918, "step": 2733 }, { "epoch": 0.4, "grad_norm": 6.543373107910156, "learning_rate": 1.954836706781955e-06, "loss": 0.7146, "step": 2734 }, { "epoch": 0.4, "grad_norm": 5.655135154724121, "learning_rate": 1.954789664151264e-06, "loss": 0.6772, "step": 2735 }, { "epoch": 0.4, "grad_norm": 5.768637657165527, "learning_rate": 1.9547425975998695e-06, "loss": 0.7567, "step": 2736 }, { "epoch": 0.4, "grad_norm": 5.468042373657227, "learning_rate": 1.95469550712895e-06, "loss": 0.739, "step": 2737 }, { "epoch": 0.4, "grad_norm": 7.3113908767700195, "learning_rate": 1.9546483927396864e-06, "loss": 0.709, "step": 2738 }, { "epoch": 0.4, "grad_norm": 6.657531261444092, "learning_rate": 1.9546012544332586e-06, "loss": 0.7775, "step": 2739 }, { "epoch": 0.4, "grad_norm": 6.385902404785156, "learning_rate": 1.954554092210847e-06, "loss": 0.7481, "step": 2740 }, { "epoch": 0.4, "grad_norm": 5.259888172149658, "learning_rate": 1.954506906073634e-06, "loss": 0.7408, "step": 2741 }, { "epoch": 0.4, "grad_norm": 6.302123069763184, "learning_rate": 1.954459696022801e-06, "loss": 0.725, "step": 2742 }, { "epoch": 0.4, "grad_norm": 6.080132007598877, "learning_rate": 1.9544124620595316e-06, "loss": 0.6991, "step": 2743 }, { "epoch": 0.4, "grad_norm": 5.591606140136719, "learning_rate": 1.9543652041850084e-06, "loss": 0.7499, "step": 2744 }, { "epoch": 0.4, "grad_norm": 5.773857593536377, "learning_rate": 1.9543179224004154e-06, "loss": 0.8402, "step": 2745 }, { "epoch": 0.4, "grad_norm": 5.386804580688477, "learning_rate": 1.954270616706938e-06, "loss": 0.7106, "step": 2746 }, { "epoch": 0.4, "grad_norm": 5.990421772003174, "learning_rate": 1.9542232871057607e-06, "loss": 0.7906, "step": 2747 }, { "epoch": 0.4, "grad_norm": 5.9938788414001465, "learning_rate": 1.9541759335980693e-06, "loss": 0.7444, "step": 2748 }, { "epoch": 0.4, "grad_norm": 6.587980270385742, "learning_rate": 1.9541285561850503e-06, "loss": 0.7297, "step": 2749 }, { "epoch": 0.4, "grad_norm": 6.068031311035156, "learning_rate": 1.9540811548678904e-06, "loss": 0.732, "step": 2750 }, { "epoch": 0.4, "grad_norm": 5.904868125915527, "learning_rate": 1.9540337296477775e-06, "loss": 0.7244, "step": 2751 }, { "epoch": 0.4, "grad_norm": 6.094337463378906, "learning_rate": 1.9539862805259e-06, "loss": 0.6993, "step": 2752 }, { "epoch": 0.4, "grad_norm": 6.857419490814209, "learning_rate": 1.9539388075034458e-06, "loss": 0.7573, "step": 2753 }, { "epoch": 0.4, "grad_norm": 6.273640155792236, "learning_rate": 1.9538913105816047e-06, "loss": 0.7153, "step": 2754 }, { "epoch": 0.4, "grad_norm": 6.2903733253479, "learning_rate": 1.953843789761567e-06, "loss": 0.6785, "step": 2755 }, { "epoch": 0.4, "grad_norm": 6.0703325271606445, "learning_rate": 1.9537962450445235e-06, "loss": 0.7147, "step": 2756 }, { "epoch": 0.4, "grad_norm": 6.140186309814453, "learning_rate": 1.953748676431664e-06, "loss": 0.8095, "step": 2757 }, { "epoch": 0.4, "grad_norm": 6.0136942863464355, "learning_rate": 1.9537010839241814e-06, "loss": 0.7378, "step": 2758 }, { "epoch": 0.4, "grad_norm": 6.257749557495117, "learning_rate": 1.9536534675232676e-06, "loss": 0.8424, "step": 2759 }, { "epoch": 0.4, "grad_norm": 5.495415210723877, "learning_rate": 1.953605827230116e-06, "loss": 0.6985, "step": 2760 }, { "epoch": 0.4, "grad_norm": 5.635964393615723, "learning_rate": 1.9535581630459195e-06, "loss": 0.7213, "step": 2761 }, { "epoch": 0.4, "grad_norm": 6.35843563079834, "learning_rate": 1.953510474971873e-06, "loss": 0.8575, "step": 2762 }, { "epoch": 0.4, "grad_norm": 5.882627010345459, "learning_rate": 1.9534627630091705e-06, "loss": 0.7817, "step": 2763 }, { "epoch": 0.4, "grad_norm": 6.234212398529053, "learning_rate": 1.9534150271590078e-06, "loss": 0.6915, "step": 2764 }, { "epoch": 0.4, "grad_norm": 5.913034915924072, "learning_rate": 1.953367267422581e-06, "loss": 0.7373, "step": 2765 }, { "epoch": 0.4, "grad_norm": 5.284576416015625, "learning_rate": 1.9533194838010857e-06, "loss": 0.7392, "step": 2766 }, { "epoch": 0.4, "grad_norm": 6.2427077293396, "learning_rate": 1.95327167629572e-06, "loss": 0.7828, "step": 2767 }, { "epoch": 0.4, "grad_norm": 6.127495765686035, "learning_rate": 1.953223844907682e-06, "loss": 0.7455, "step": 2768 }, { "epoch": 0.4, "grad_norm": 5.956553936004639, "learning_rate": 1.953175989638169e-06, "loss": 0.7292, "step": 2769 }, { "epoch": 0.4, "grad_norm": 5.643560409545898, "learning_rate": 1.95312811048838e-06, "loss": 0.7123, "step": 2770 }, { "epoch": 0.4, "grad_norm": 6.532345771789551, "learning_rate": 1.953080207459515e-06, "loss": 0.8013, "step": 2771 }, { "epoch": 0.4, "grad_norm": 7.65672492980957, "learning_rate": 1.953032280552774e-06, "loss": 0.9043, "step": 2772 }, { "epoch": 0.4, "grad_norm": 6.145915985107422, "learning_rate": 1.952984329769358e-06, "loss": 0.7908, "step": 2773 }, { "epoch": 0.4, "grad_norm": 5.959425926208496, "learning_rate": 1.9529363551104677e-06, "loss": 0.7888, "step": 2774 }, { "epoch": 0.4, "grad_norm": 6.011079788208008, "learning_rate": 1.9528883565773056e-06, "loss": 0.7602, "step": 2775 }, { "epoch": 0.4, "grad_norm": 5.887458324432373, "learning_rate": 1.952840334171074e-06, "loss": 0.7322, "step": 2776 }, { "epoch": 0.4, "grad_norm": 6.181153297424316, "learning_rate": 1.952792287892976e-06, "loss": 0.8049, "step": 2777 }, { "epoch": 0.4, "grad_norm": 6.545906066894531, "learning_rate": 1.952744217744215e-06, "loss": 0.7597, "step": 2778 }, { "epoch": 0.4, "grad_norm": 6.204325199127197, "learning_rate": 1.952696123725996e-06, "loss": 0.8143, "step": 2779 }, { "epoch": 0.4, "grad_norm": 6.088281154632568, "learning_rate": 1.9526480058395237e-06, "loss": 0.7365, "step": 2780 }, { "epoch": 0.4, "grad_norm": 5.354375839233398, "learning_rate": 1.952599864086003e-06, "loss": 0.6686, "step": 2781 }, { "epoch": 0.4, "grad_norm": 5.408016681671143, "learning_rate": 1.952551698466641e-06, "loss": 0.6972, "step": 2782 }, { "epoch": 0.4, "grad_norm": 5.915835857391357, "learning_rate": 1.952503508982644e-06, "loss": 0.7343, "step": 2783 }, { "epoch": 0.4, "grad_norm": 6.177922248840332, "learning_rate": 1.952455295635219e-06, "loss": 0.7642, "step": 2784 }, { "epoch": 0.4, "grad_norm": 6.301286220550537, "learning_rate": 1.9524070584255745e-06, "loss": 0.8452, "step": 2785 }, { "epoch": 0.4, "grad_norm": 5.659956932067871, "learning_rate": 1.9523587973549183e-06, "loss": 0.7086, "step": 2786 }, { "epoch": 0.4, "grad_norm": 5.996121406555176, "learning_rate": 1.95231051242446e-06, "loss": 0.7299, "step": 2787 }, { "epoch": 0.4, "grad_norm": 5.7547078132629395, "learning_rate": 1.9522622036354087e-06, "loss": 0.7854, "step": 2788 }, { "epoch": 0.4, "grad_norm": 5.252204895019531, "learning_rate": 1.9522138709889756e-06, "loss": 0.6843, "step": 2789 }, { "epoch": 0.4, "grad_norm": 5.800207614898682, "learning_rate": 1.952165514486371e-06, "loss": 0.717, "step": 2790 }, { "epoch": 0.4, "grad_norm": 7.37682580947876, "learning_rate": 1.9521171341288066e-06, "loss": 0.8043, "step": 2791 }, { "epoch": 0.41, "grad_norm": 6.2206315994262695, "learning_rate": 1.9520687299174945e-06, "loss": 0.7466, "step": 2792 }, { "epoch": 0.41, "grad_norm": 5.888787746429443, "learning_rate": 1.9520203018536474e-06, "loss": 0.742, "step": 2793 }, { "epoch": 0.41, "grad_norm": 6.1869635581970215, "learning_rate": 1.951971849938478e-06, "loss": 0.7432, "step": 2794 }, { "epoch": 0.41, "grad_norm": 6.406360626220703, "learning_rate": 1.951923374173201e-06, "loss": 0.8458, "step": 2795 }, { "epoch": 0.41, "grad_norm": 6.035416126251221, "learning_rate": 1.9518748745590308e-06, "loss": 0.7417, "step": 2796 }, { "epoch": 0.41, "grad_norm": 6.223974227905273, "learning_rate": 1.951826351097182e-06, "loss": 0.7245, "step": 2797 }, { "epoch": 0.41, "grad_norm": 5.884003639221191, "learning_rate": 1.9517778037888707e-06, "loss": 0.7504, "step": 2798 }, { "epoch": 0.41, "grad_norm": 5.749340534210205, "learning_rate": 1.951729232635313e-06, "loss": 0.6955, "step": 2799 }, { "epoch": 0.41, "grad_norm": 5.805780410766602, "learning_rate": 1.951680637637726e-06, "loss": 0.7004, "step": 2800 }, { "epoch": 0.41, "grad_norm": 6.4624505043029785, "learning_rate": 1.9516320187973265e-06, "loss": 0.788, "step": 2801 }, { "epoch": 0.41, "grad_norm": 5.810768127441406, "learning_rate": 1.9515833761153328e-06, "loss": 0.7005, "step": 2802 }, { "epoch": 0.41, "grad_norm": 5.924090385437012, "learning_rate": 1.951534709592964e-06, "loss": 0.7472, "step": 2803 }, { "epoch": 0.41, "grad_norm": 6.244199275970459, "learning_rate": 1.951486019231439e-06, "loss": 0.761, "step": 2804 }, { "epoch": 0.41, "grad_norm": 5.709132194519043, "learning_rate": 1.9514373050319782e-06, "loss": 0.7443, "step": 2805 }, { "epoch": 0.41, "grad_norm": 6.408870697021484, "learning_rate": 1.9513885669958015e-06, "loss": 0.7408, "step": 2806 }, { "epoch": 0.41, "grad_norm": 5.6360321044921875, "learning_rate": 1.9513398051241303e-06, "loss": 0.7695, "step": 2807 }, { "epoch": 0.41, "grad_norm": 6.126442909240723, "learning_rate": 1.9512910194181857e-06, "loss": 0.7458, "step": 2808 }, { "epoch": 0.41, "grad_norm": 5.855363845825195, "learning_rate": 1.9512422098791907e-06, "loss": 0.6729, "step": 2809 }, { "epoch": 0.41, "grad_norm": 6.534947395324707, "learning_rate": 1.951193376508367e-06, "loss": 0.757, "step": 2810 }, { "epoch": 0.41, "grad_norm": 5.935756206512451, "learning_rate": 1.9511445193069394e-06, "loss": 0.7494, "step": 2811 }, { "epoch": 0.41, "grad_norm": 5.970674991607666, "learning_rate": 1.951095638276131e-06, "loss": 0.6999, "step": 2812 }, { "epoch": 0.41, "grad_norm": 6.19324254989624, "learning_rate": 1.951046733417167e-06, "loss": 0.7427, "step": 2813 }, { "epoch": 0.41, "grad_norm": 5.773469924926758, "learning_rate": 1.9509978047312725e-06, "loss": 0.7649, "step": 2814 }, { "epoch": 0.41, "grad_norm": 5.774258613586426, "learning_rate": 1.950948852219673e-06, "loss": 0.7966, "step": 2815 }, { "epoch": 0.41, "grad_norm": 5.2600297927856445, "learning_rate": 1.950899875883595e-06, "loss": 0.7729, "step": 2816 }, { "epoch": 0.41, "grad_norm": 5.488821029663086, "learning_rate": 1.9508508757242657e-06, "loss": 0.685, "step": 2817 }, { "epoch": 0.41, "grad_norm": 6.098687171936035, "learning_rate": 1.9508018517429128e-06, "loss": 0.7339, "step": 2818 }, { "epoch": 0.41, "grad_norm": 5.942678928375244, "learning_rate": 1.950752803940764e-06, "loss": 0.7202, "step": 2819 }, { "epoch": 0.41, "grad_norm": 5.137801170349121, "learning_rate": 1.950703732319049e-06, "loss": 0.6467, "step": 2820 }, { "epoch": 0.41, "grad_norm": 5.671684265136719, "learning_rate": 1.9506546368789966e-06, "loss": 0.6871, "step": 2821 }, { "epoch": 0.41, "grad_norm": 5.4953694343566895, "learning_rate": 1.950605517621837e-06, "loss": 0.7292, "step": 2822 }, { "epoch": 0.41, "grad_norm": 6.38952112197876, "learning_rate": 1.9505563745488006e-06, "loss": 0.7459, "step": 2823 }, { "epoch": 0.41, "grad_norm": 6.1914143562316895, "learning_rate": 1.9505072076611187e-06, "loss": 0.7312, "step": 2824 }, { "epoch": 0.41, "grad_norm": 6.070630073547363, "learning_rate": 1.9504580169600233e-06, "loss": 0.7516, "step": 2825 }, { "epoch": 0.41, "grad_norm": 6.209557056427002, "learning_rate": 1.9504088024467463e-06, "loss": 0.783, "step": 2826 }, { "epoch": 0.41, "grad_norm": 7.260295867919922, "learning_rate": 1.9503595641225215e-06, "loss": 0.7937, "step": 2827 }, { "epoch": 0.41, "grad_norm": 6.174968242645264, "learning_rate": 1.9503103019885817e-06, "loss": 0.7719, "step": 2828 }, { "epoch": 0.41, "grad_norm": 7.068309783935547, "learning_rate": 1.9502610160461614e-06, "loss": 0.7345, "step": 2829 }, { "epoch": 0.41, "grad_norm": 5.295310974121094, "learning_rate": 1.9502117062964953e-06, "loss": 0.6867, "step": 2830 }, { "epoch": 0.41, "grad_norm": 5.7319512367248535, "learning_rate": 1.9501623727408187e-06, "loss": 0.8116, "step": 2831 }, { "epoch": 0.41, "grad_norm": 5.693915843963623, "learning_rate": 1.950113015380368e-06, "loss": 0.6948, "step": 2832 }, { "epoch": 0.41, "grad_norm": 6.205255031585693, "learning_rate": 1.9500636342163794e-06, "loss": 0.7135, "step": 2833 }, { "epoch": 0.41, "grad_norm": 6.672582149505615, "learning_rate": 1.9500142292500897e-06, "loss": 0.6721, "step": 2834 }, { "epoch": 0.41, "grad_norm": 5.84286642074585, "learning_rate": 1.949964800482738e-06, "loss": 0.7682, "step": 2835 }, { "epoch": 0.41, "grad_norm": 6.387214183807373, "learning_rate": 1.949915347915561e-06, "loss": 0.7136, "step": 2836 }, { "epoch": 0.41, "grad_norm": 6.010508060455322, "learning_rate": 1.9498658715497982e-06, "loss": 0.7418, "step": 2837 }, { "epoch": 0.41, "grad_norm": 5.346487998962402, "learning_rate": 1.94981637138669e-06, "loss": 0.6833, "step": 2838 }, { "epoch": 0.41, "grad_norm": 5.935553550720215, "learning_rate": 1.9497668474274756e-06, "loss": 0.6477, "step": 2839 }, { "epoch": 0.41, "grad_norm": 5.923573017120361, "learning_rate": 1.949717299673396e-06, "loss": 0.8012, "step": 2840 }, { "epoch": 0.41, "grad_norm": 5.498530864715576, "learning_rate": 1.9496677281256933e-06, "loss": 0.693, "step": 2841 }, { "epoch": 0.41, "grad_norm": 5.9583611488342285, "learning_rate": 1.949618132785608e-06, "loss": 0.7708, "step": 2842 }, { "epoch": 0.41, "grad_norm": 6.04915714263916, "learning_rate": 1.9495685136543833e-06, "loss": 0.7847, "step": 2843 }, { "epoch": 0.41, "grad_norm": 7.165902137756348, "learning_rate": 1.9495188707332626e-06, "loss": 0.7902, "step": 2844 }, { "epoch": 0.41, "grad_norm": 5.451613426208496, "learning_rate": 1.9494692040234893e-06, "loss": 0.6586, "step": 2845 }, { "epoch": 0.41, "grad_norm": 6.343653202056885, "learning_rate": 1.949419513526308e-06, "loss": 0.7962, "step": 2846 }, { "epoch": 0.41, "grad_norm": 5.852301597595215, "learning_rate": 1.949369799242963e-06, "loss": 0.703, "step": 2847 }, { "epoch": 0.41, "grad_norm": 5.766817569732666, "learning_rate": 1.949320061174701e-06, "loss": 0.6474, "step": 2848 }, { "epoch": 0.41, "grad_norm": 5.127243995666504, "learning_rate": 1.9492702993227666e-06, "loss": 0.7203, "step": 2849 }, { "epoch": 0.41, "grad_norm": 5.581573486328125, "learning_rate": 1.9492205136884075e-06, "loss": 0.7072, "step": 2850 }, { "epoch": 0.41, "grad_norm": 6.642293930053711, "learning_rate": 1.949170704272871e-06, "loss": 0.7997, "step": 2851 }, { "epoch": 0.41, "grad_norm": 5.91754150390625, "learning_rate": 1.9491208710774045e-06, "loss": 0.7349, "step": 2852 }, { "epoch": 0.41, "grad_norm": 6.3504252433776855, "learning_rate": 1.9490710141032573e-06, "loss": 0.7882, "step": 2853 }, { "epoch": 0.41, "grad_norm": 6.308306694030762, "learning_rate": 1.949021133351677e-06, "loss": 0.775, "step": 2854 }, { "epoch": 0.41, "grad_norm": 5.969082832336426, "learning_rate": 1.9489712288239146e-06, "loss": 0.7342, "step": 2855 }, { "epoch": 0.41, "grad_norm": 5.410499572753906, "learning_rate": 1.9489213005212203e-06, "loss": 0.741, "step": 2856 }, { "epoch": 0.41, "grad_norm": 5.895951747894287, "learning_rate": 1.948871348444844e-06, "loss": 0.7508, "step": 2857 }, { "epoch": 0.41, "grad_norm": 5.811308860778809, "learning_rate": 1.9488213725960383e-06, "loss": 0.7979, "step": 2858 }, { "epoch": 0.41, "grad_norm": 6.053653717041016, "learning_rate": 1.9487713729760545e-06, "loss": 0.7819, "step": 2859 }, { "epoch": 0.41, "grad_norm": 5.611668109893799, "learning_rate": 1.948721349586146e-06, "loss": 0.772, "step": 2860 }, { "epoch": 0.42, "grad_norm": 5.898468971252441, "learning_rate": 1.9486713024275652e-06, "loss": 0.6967, "step": 2861 }, { "epoch": 0.42, "grad_norm": 5.8253045082092285, "learning_rate": 1.9486212315015662e-06, "loss": 0.7385, "step": 2862 }, { "epoch": 0.42, "grad_norm": 5.811188697814941, "learning_rate": 1.948571136809404e-06, "loss": 0.7224, "step": 2863 }, { "epoch": 0.42, "grad_norm": 6.409305095672607, "learning_rate": 1.948521018352333e-06, "loss": 0.6954, "step": 2864 }, { "epoch": 0.42, "grad_norm": 6.451624393463135, "learning_rate": 1.9484708761316093e-06, "loss": 0.7107, "step": 2865 }, { "epoch": 0.42, "grad_norm": 5.634249210357666, "learning_rate": 1.9484207101484885e-06, "loss": 0.6751, "step": 2866 }, { "epoch": 0.42, "grad_norm": 6.322019100189209, "learning_rate": 1.9483705204042283e-06, "loss": 0.7554, "step": 2867 }, { "epoch": 0.42, "grad_norm": 6.361479759216309, "learning_rate": 1.9483203069000854e-06, "loss": 0.7094, "step": 2868 }, { "epoch": 0.42, "grad_norm": 5.96303653717041, "learning_rate": 1.948270069637318e-06, "loss": 0.7684, "step": 2869 }, { "epoch": 0.42, "grad_norm": 6.169341564178467, "learning_rate": 1.9482198086171847e-06, "loss": 0.717, "step": 2870 }, { "epoch": 0.42, "grad_norm": 6.775197505950928, "learning_rate": 1.948169523840945e-06, "loss": 0.7552, "step": 2871 }, { "epoch": 0.42, "grad_norm": 6.16216516494751, "learning_rate": 1.9481192153098586e-06, "loss": 0.816, "step": 2872 }, { "epoch": 0.42, "grad_norm": 5.622946739196777, "learning_rate": 1.9480688830251856e-06, "loss": 0.7313, "step": 2873 }, { "epoch": 0.42, "grad_norm": 5.8247551918029785, "learning_rate": 1.948018526988187e-06, "loss": 0.6563, "step": 2874 }, { "epoch": 0.42, "grad_norm": 6.083683490753174, "learning_rate": 1.9479681472001247e-06, "loss": 0.7459, "step": 2875 }, { "epoch": 0.42, "grad_norm": 5.690889358520508, "learning_rate": 1.9479177436622613e-06, "loss": 0.664, "step": 2876 }, { "epoch": 0.42, "grad_norm": 5.866720199584961, "learning_rate": 1.947867316375858e-06, "loss": 0.7422, "step": 2877 }, { "epoch": 0.42, "grad_norm": 6.012958526611328, "learning_rate": 1.94781686534218e-06, "loss": 0.7151, "step": 2878 }, { "epoch": 0.42, "grad_norm": 5.937986373901367, "learning_rate": 1.9477663905624905e-06, "loss": 0.7787, "step": 2879 }, { "epoch": 0.42, "grad_norm": 6.1350884437561035, "learning_rate": 1.9477158920380534e-06, "loss": 0.7551, "step": 2880 }, { "epoch": 0.42, "grad_norm": 6.612147808074951, "learning_rate": 1.947665369770135e-06, "loss": 0.7817, "step": 2881 }, { "epoch": 0.42, "grad_norm": 6.133176803588867, "learning_rate": 1.9476148237600006e-06, "loss": 0.6163, "step": 2882 }, { "epoch": 0.42, "grad_norm": 6.819246768951416, "learning_rate": 1.9475642540089166e-06, "loss": 0.8754, "step": 2883 }, { "epoch": 0.42, "grad_norm": 6.13386344909668, "learning_rate": 1.9475136605181495e-06, "loss": 0.8312, "step": 2884 }, { "epoch": 0.42, "grad_norm": 6.459657669067383, "learning_rate": 1.9474630432889673e-06, "loss": 0.8014, "step": 2885 }, { "epoch": 0.42, "grad_norm": 6.509217262268066, "learning_rate": 1.9474124023226383e-06, "loss": 0.7351, "step": 2886 }, { "epoch": 0.42, "grad_norm": 5.98253059387207, "learning_rate": 1.9473617376204307e-06, "loss": 0.6044, "step": 2887 }, { "epoch": 0.42, "grad_norm": 5.3323163986206055, "learning_rate": 1.947311049183614e-06, "loss": 0.6707, "step": 2888 }, { "epoch": 0.42, "grad_norm": 5.894948959350586, "learning_rate": 1.947260337013458e-06, "loss": 0.7788, "step": 2889 }, { "epoch": 0.42, "grad_norm": 5.765289306640625, "learning_rate": 1.9472096011112334e-06, "loss": 0.6712, "step": 2890 }, { "epoch": 0.42, "grad_norm": 7.20007848739624, "learning_rate": 1.9471588414782118e-06, "loss": 0.8138, "step": 2891 }, { "epoch": 0.42, "grad_norm": 6.070067405700684, "learning_rate": 1.9471080581156642e-06, "loss": 0.7377, "step": 2892 }, { "epoch": 0.42, "grad_norm": 5.805476188659668, "learning_rate": 1.947057251024863e-06, "loss": 0.6592, "step": 2893 }, { "epoch": 0.42, "grad_norm": 5.844421863555908, "learning_rate": 1.947006420207081e-06, "loss": 0.7169, "step": 2894 }, { "epoch": 0.42, "grad_norm": 6.096348762512207, "learning_rate": 1.946955565663592e-06, "loss": 0.7976, "step": 2895 }, { "epoch": 0.42, "grad_norm": 6.3184614181518555, "learning_rate": 1.94690468739567e-06, "loss": 0.7517, "step": 2896 }, { "epoch": 0.42, "grad_norm": 6.009201526641846, "learning_rate": 1.94685378540459e-06, "loss": 0.7664, "step": 2897 }, { "epoch": 0.42, "grad_norm": 6.202377796173096, "learning_rate": 1.946802859691626e-06, "loss": 0.6789, "step": 2898 }, { "epoch": 0.42, "grad_norm": 5.792273998260498, "learning_rate": 1.9467519102580553e-06, "loss": 0.7692, "step": 2899 }, { "epoch": 0.42, "grad_norm": 5.590024948120117, "learning_rate": 1.946700937105154e-06, "loss": 0.7322, "step": 2900 }, { "epoch": 0.42, "grad_norm": 6.61845588684082, "learning_rate": 1.9466499402341983e-06, "loss": 0.7978, "step": 2901 }, { "epoch": 0.42, "grad_norm": 6.699134349822998, "learning_rate": 1.946598919646467e-06, "loss": 0.8735, "step": 2902 }, { "epoch": 0.42, "grad_norm": 5.8087263107299805, "learning_rate": 1.9465478753432377e-06, "loss": 0.6767, "step": 2903 }, { "epoch": 0.42, "grad_norm": 6.086528778076172, "learning_rate": 1.9464968073257895e-06, "loss": 0.7564, "step": 2904 }, { "epoch": 0.42, "grad_norm": 6.817932605743408, "learning_rate": 1.9464457155954015e-06, "loss": 0.7428, "step": 2905 }, { "epoch": 0.42, "grad_norm": 5.962221622467041, "learning_rate": 1.946394600153354e-06, "loss": 0.6883, "step": 2906 }, { "epoch": 0.42, "grad_norm": 5.359172821044922, "learning_rate": 1.9463434610009274e-06, "loss": 0.6763, "step": 2907 }, { "epoch": 0.42, "grad_norm": 5.5830888748168945, "learning_rate": 1.946292298139403e-06, "loss": 0.7297, "step": 2908 }, { "epoch": 0.42, "grad_norm": 5.9562835693359375, "learning_rate": 1.946241111570063e-06, "loss": 0.7443, "step": 2909 }, { "epoch": 0.42, "grad_norm": 5.470738887786865, "learning_rate": 1.9461899012941887e-06, "loss": 0.7039, "step": 2910 }, { "epoch": 0.42, "grad_norm": 6.3882737159729, "learning_rate": 1.9461386673130647e-06, "loss": 0.7597, "step": 2911 }, { "epoch": 0.42, "grad_norm": 5.536895275115967, "learning_rate": 1.946087409627973e-06, "loss": 0.7919, "step": 2912 }, { "epoch": 0.42, "grad_norm": 6.002549648284912, "learning_rate": 1.9460361282401993e-06, "loss": 0.6976, "step": 2913 }, { "epoch": 0.42, "grad_norm": 5.949002742767334, "learning_rate": 1.9459848231510266e-06, "loss": 0.7813, "step": 2914 }, { "epoch": 0.42, "grad_norm": 6.588278293609619, "learning_rate": 1.945933494361742e-06, "loss": 0.6574, "step": 2915 }, { "epoch": 0.42, "grad_norm": 6.05366849899292, "learning_rate": 1.9458821418736304e-06, "loss": 0.76, "step": 2916 }, { "epoch": 0.42, "grad_norm": 5.715183734893799, "learning_rate": 1.945830765687978e-06, "loss": 0.7487, "step": 2917 }, { "epoch": 0.42, "grad_norm": 5.172585487365723, "learning_rate": 1.9457793658060735e-06, "loss": 0.6869, "step": 2918 }, { "epoch": 0.42, "grad_norm": 6.0471038818359375, "learning_rate": 1.945727942229203e-06, "loss": 0.7054, "step": 2919 }, { "epoch": 0.42, "grad_norm": 7.791501045227051, "learning_rate": 1.9456764949586566e-06, "loss": 0.6961, "step": 2920 }, { "epoch": 0.42, "grad_norm": 5.591205596923828, "learning_rate": 1.945625023995721e-06, "loss": 0.7134, "step": 2921 }, { "epoch": 0.42, "grad_norm": 7.30221700668335, "learning_rate": 1.9455735293416877e-06, "loss": 0.7428, "step": 2922 }, { "epoch": 0.42, "grad_norm": 5.540031433105469, "learning_rate": 1.9455220109978457e-06, "loss": 0.7314, "step": 2923 }, { "epoch": 0.42, "grad_norm": 6.235821723937988, "learning_rate": 1.9454704689654857e-06, "loss": 0.7711, "step": 2924 }, { "epoch": 0.42, "grad_norm": 5.759390354156494, "learning_rate": 1.9454189032458997e-06, "loss": 0.762, "step": 2925 }, { "epoch": 0.42, "grad_norm": 6.339443683624268, "learning_rate": 1.9453673138403795e-06, "loss": 0.7896, "step": 2926 }, { "epoch": 0.42, "grad_norm": 7.009243488311768, "learning_rate": 1.9453157007502167e-06, "loss": 0.7818, "step": 2927 }, { "epoch": 0.42, "grad_norm": 5.913127899169922, "learning_rate": 1.9452640639767057e-06, "loss": 0.7993, "step": 2928 }, { "epoch": 0.42, "grad_norm": 6.3307929039001465, "learning_rate": 1.945212403521139e-06, "loss": 0.7395, "step": 2929 }, { "epoch": 0.43, "grad_norm": 6.409564018249512, "learning_rate": 1.9451607193848112e-06, "loss": 0.7075, "step": 2930 }, { "epoch": 0.43, "grad_norm": 5.688034534454346, "learning_rate": 1.9451090115690177e-06, "loss": 0.745, "step": 2931 }, { "epoch": 0.43, "grad_norm": 5.909750461578369, "learning_rate": 1.945057280075053e-06, "loss": 0.6976, "step": 2932 }, { "epoch": 0.43, "grad_norm": 6.398123741149902, "learning_rate": 1.945005524904214e-06, "loss": 0.7121, "step": 2933 }, { "epoch": 0.43, "grad_norm": 6.282713890075684, "learning_rate": 1.9449537460577974e-06, "loss": 0.667, "step": 2934 }, { "epoch": 0.43, "grad_norm": 6.341833591461182, "learning_rate": 1.9449019435370995e-06, "loss": 0.7874, "step": 2935 }, { "epoch": 0.43, "grad_norm": 6.893148899078369, "learning_rate": 1.9448501173434193e-06, "loss": 0.8015, "step": 2936 }, { "epoch": 0.43, "grad_norm": 6.431652545928955, "learning_rate": 1.944798267478054e-06, "loss": 0.7258, "step": 2937 }, { "epoch": 0.43, "grad_norm": 6.452258110046387, "learning_rate": 1.944746393942304e-06, "loss": 0.8046, "step": 2938 }, { "epoch": 0.43, "grad_norm": 5.759305000305176, "learning_rate": 1.9446944967374677e-06, "loss": 0.7084, "step": 2939 }, { "epoch": 0.43, "grad_norm": 6.646994113922119, "learning_rate": 1.944642575864846e-06, "loss": 0.6774, "step": 2940 }, { "epoch": 0.43, "grad_norm": 6.03286600112915, "learning_rate": 1.9445906313257388e-06, "loss": 0.7624, "step": 2941 }, { "epoch": 0.43, "grad_norm": 6.346606254577637, "learning_rate": 1.944538663121449e-06, "loss": 0.7607, "step": 2942 }, { "epoch": 0.43, "grad_norm": 5.674247741699219, "learning_rate": 1.944486671253277e-06, "loss": 0.6868, "step": 2943 }, { "epoch": 0.43, "grad_norm": 5.768939018249512, "learning_rate": 1.9444346557225263e-06, "loss": 0.6858, "step": 2944 }, { "epoch": 0.43, "grad_norm": 5.889585018157959, "learning_rate": 1.9443826165305e-06, "loss": 0.7026, "step": 2945 }, { "epoch": 0.43, "grad_norm": 5.813731670379639, "learning_rate": 1.9443305536785017e-06, "loss": 0.7671, "step": 2946 }, { "epoch": 0.43, "grad_norm": 5.622334003448486, "learning_rate": 1.9442784671678354e-06, "loss": 0.6553, "step": 2947 }, { "epoch": 0.43, "grad_norm": 5.876180648803711, "learning_rate": 1.9442263569998066e-06, "loss": 0.7117, "step": 2948 }, { "epoch": 0.43, "grad_norm": 5.418308258056641, "learning_rate": 1.944174223175721e-06, "loss": 0.6578, "step": 2949 }, { "epoch": 0.43, "grad_norm": 5.722068786621094, "learning_rate": 1.944122065696884e-06, "loss": 0.728, "step": 2950 }, { "epoch": 0.43, "grad_norm": 5.719003200531006, "learning_rate": 1.9440698845646027e-06, "loss": 0.6663, "step": 2951 }, { "epoch": 0.43, "grad_norm": 5.663257122039795, "learning_rate": 1.944017679780184e-06, "loss": 0.7151, "step": 2952 }, { "epoch": 0.43, "grad_norm": 5.383286952972412, "learning_rate": 1.943965451344937e-06, "loss": 0.6457, "step": 2953 }, { "epoch": 0.43, "grad_norm": 6.08974027633667, "learning_rate": 1.943913199260169e-06, "loss": 0.7481, "step": 2954 }, { "epoch": 0.43, "grad_norm": 6.49755859375, "learning_rate": 1.9438609235271894e-06, "loss": 0.7588, "step": 2955 }, { "epoch": 0.43, "grad_norm": 5.453999042510986, "learning_rate": 1.9438086241473082e-06, "loss": 0.655, "step": 2956 }, { "epoch": 0.43, "grad_norm": 5.463875770568848, "learning_rate": 1.9437563011218352e-06, "loss": 0.7262, "step": 2957 }, { "epoch": 0.43, "grad_norm": 5.525237560272217, "learning_rate": 1.9437039544520817e-06, "loss": 0.6729, "step": 2958 }, { "epoch": 0.43, "grad_norm": 5.493068695068359, "learning_rate": 1.943651584139359e-06, "loss": 0.7327, "step": 2959 }, { "epoch": 0.43, "grad_norm": 5.528882026672363, "learning_rate": 1.943599190184979e-06, "loss": 0.7171, "step": 2960 }, { "epoch": 0.43, "grad_norm": 5.966179370880127, "learning_rate": 1.9435467725902544e-06, "loss": 0.7902, "step": 2961 }, { "epoch": 0.43, "grad_norm": 5.2706618309021, "learning_rate": 1.9434943313564988e-06, "loss": 0.7486, "step": 2962 }, { "epoch": 0.43, "grad_norm": 5.558921813964844, "learning_rate": 1.943441866485026e-06, "loss": 0.6876, "step": 2963 }, { "epoch": 0.43, "grad_norm": 6.274701118469238, "learning_rate": 1.9433893779771493e-06, "loss": 0.8296, "step": 2964 }, { "epoch": 0.43, "grad_norm": 6.146193504333496, "learning_rate": 1.943336865834185e-06, "loss": 0.7407, "step": 2965 }, { "epoch": 0.43, "grad_norm": 5.585958957672119, "learning_rate": 1.9432843300574485e-06, "loss": 0.7507, "step": 2966 }, { "epoch": 0.43, "grad_norm": 5.842879295349121, "learning_rate": 1.9432317706482556e-06, "loss": 0.749, "step": 2967 }, { "epoch": 0.43, "grad_norm": 5.321963310241699, "learning_rate": 1.943179187607923e-06, "loss": 0.6894, "step": 2968 }, { "epoch": 0.43, "grad_norm": 6.641183853149414, "learning_rate": 1.9431265809377687e-06, "loss": 0.7832, "step": 2969 }, { "epoch": 0.43, "grad_norm": 6.143787384033203, "learning_rate": 1.9430739506391103e-06, "loss": 0.7492, "step": 2970 }, { "epoch": 0.43, "grad_norm": 6.184420585632324, "learning_rate": 1.9430212967132666e-06, "loss": 0.6466, "step": 2971 }, { "epoch": 0.43, "grad_norm": 7.014873027801514, "learning_rate": 1.9429686191615563e-06, "loss": 0.7584, "step": 2972 }, { "epoch": 0.43, "grad_norm": 6.532639503479004, "learning_rate": 1.9429159179852993e-06, "loss": 0.6951, "step": 2973 }, { "epoch": 0.43, "grad_norm": 7.075316429138184, "learning_rate": 1.9428631931858164e-06, "loss": 0.8321, "step": 2974 }, { "epoch": 0.43, "grad_norm": 5.708621978759766, "learning_rate": 1.9428104447644282e-06, "loss": 0.7284, "step": 2975 }, { "epoch": 0.43, "grad_norm": 5.493144512176514, "learning_rate": 1.9427576727224556e-06, "loss": 0.64, "step": 2976 }, { "epoch": 0.43, "grad_norm": 5.744162082672119, "learning_rate": 1.942704877061222e-06, "loss": 0.7138, "step": 2977 }, { "epoch": 0.43, "grad_norm": 5.382176399230957, "learning_rate": 1.942652057782049e-06, "loss": 0.6812, "step": 2978 }, { "epoch": 0.43, "grad_norm": 5.600236415863037, "learning_rate": 1.9425992148862605e-06, "loss": 0.7256, "step": 2979 }, { "epoch": 0.43, "grad_norm": 5.8564019203186035, "learning_rate": 1.94254634837518e-06, "loss": 0.6981, "step": 2980 }, { "epoch": 0.43, "grad_norm": 5.4513702392578125, "learning_rate": 1.942493458250133e-06, "loss": 0.7179, "step": 2981 }, { "epoch": 0.43, "grad_norm": 6.512867450714111, "learning_rate": 1.942440544512443e-06, "loss": 0.6716, "step": 2982 }, { "epoch": 0.43, "grad_norm": 6.340700626373291, "learning_rate": 1.942387607163437e-06, "loss": 0.8029, "step": 2983 }, { "epoch": 0.43, "grad_norm": 5.794502258300781, "learning_rate": 1.9423346462044404e-06, "loss": 0.7349, "step": 2984 }, { "epoch": 0.43, "grad_norm": 5.280069828033447, "learning_rate": 1.94228166163678e-06, "loss": 0.7826, "step": 2985 }, { "epoch": 0.43, "grad_norm": 6.127971172332764, "learning_rate": 1.942228653461784e-06, "loss": 0.7627, "step": 2986 }, { "epoch": 0.43, "grad_norm": 5.691142559051514, "learning_rate": 1.9421756216807805e-06, "loss": 0.6936, "step": 2987 }, { "epoch": 0.43, "grad_norm": 6.038427352905273, "learning_rate": 1.9421225662950973e-06, "loss": 0.7228, "step": 2988 }, { "epoch": 0.43, "grad_norm": 6.56964635848999, "learning_rate": 1.942069487306064e-06, "loss": 0.7082, "step": 2989 }, { "epoch": 0.43, "grad_norm": 6.10171365737915, "learning_rate": 1.94201638471501e-06, "loss": 0.6903, "step": 2990 }, { "epoch": 0.43, "grad_norm": 6.528895854949951, "learning_rate": 1.9419632585232666e-06, "loss": 0.7901, "step": 2991 }, { "epoch": 0.43, "grad_norm": 5.452149391174316, "learning_rate": 1.941910108732164e-06, "loss": 0.671, "step": 2992 }, { "epoch": 0.43, "grad_norm": 6.625447750091553, "learning_rate": 1.941856935343034e-06, "loss": 0.6656, "step": 2993 }, { "epoch": 0.43, "grad_norm": 6.642675399780273, "learning_rate": 1.9418037383572095e-06, "loss": 0.7415, "step": 2994 }, { "epoch": 0.43, "grad_norm": 5.956177234649658, "learning_rate": 1.941750517776022e-06, "loss": 0.7284, "step": 2995 }, { "epoch": 0.43, "grad_norm": 5.374540328979492, "learning_rate": 1.9416972736008057e-06, "loss": 0.7087, "step": 2996 }, { "epoch": 0.43, "grad_norm": 6.064086437225342, "learning_rate": 1.9416440058328943e-06, "loss": 0.8092, "step": 2997 }, { "epoch": 0.43, "grad_norm": 5.408316135406494, "learning_rate": 1.941590714473622e-06, "loss": 0.6882, "step": 2998 }, { "epoch": 0.44, "grad_norm": 5.513706684112549, "learning_rate": 1.9415373995243246e-06, "loss": 0.7056, "step": 2999 }, { "epoch": 0.44, "grad_norm": 6.493756294250488, "learning_rate": 1.9414840609863374e-06, "loss": 0.8143, "step": 3000 }, { "epoch": 0.44, "grad_norm": 6.049862861633301, "learning_rate": 1.941430698860997e-06, "loss": 0.7376, "step": 3001 }, { "epoch": 0.44, "grad_norm": 5.4664177894592285, "learning_rate": 1.9413773131496397e-06, "loss": 0.6852, "step": 3002 }, { "epoch": 0.44, "grad_norm": 5.684367656707764, "learning_rate": 1.9413239038536037e-06, "loss": 0.7779, "step": 3003 }, { "epoch": 0.44, "grad_norm": 5.72143030166626, "learning_rate": 1.9412704709742266e-06, "loss": 0.7098, "step": 3004 }, { "epoch": 0.44, "grad_norm": 5.940935134887695, "learning_rate": 1.9412170145128474e-06, "loss": 0.7613, "step": 3005 }, { "epoch": 0.44, "grad_norm": 6.635117053985596, "learning_rate": 1.9411635344708053e-06, "loss": 0.7435, "step": 3006 }, { "epoch": 0.44, "grad_norm": 5.431354522705078, "learning_rate": 1.94111003084944e-06, "loss": 0.7155, "step": 3007 }, { "epoch": 0.44, "grad_norm": 5.96950626373291, "learning_rate": 1.941056503650092e-06, "loss": 0.7624, "step": 3008 }, { "epoch": 0.44, "grad_norm": 6.618959426879883, "learning_rate": 1.9410029528741023e-06, "loss": 0.7152, "step": 3009 }, { "epoch": 0.44, "grad_norm": 5.788949966430664, "learning_rate": 1.9409493785228126e-06, "loss": 0.7998, "step": 3010 }, { "epoch": 0.44, "grad_norm": 6.075585842132568, "learning_rate": 1.940895780597565e-06, "loss": 0.7342, "step": 3011 }, { "epoch": 0.44, "grad_norm": 5.512252330780029, "learning_rate": 1.940842159099703e-06, "loss": 0.7796, "step": 3012 }, { "epoch": 0.44, "grad_norm": 6.341228485107422, "learning_rate": 1.9407885140305687e-06, "loss": 0.7685, "step": 3013 }, { "epoch": 0.44, "grad_norm": 5.993168354034424, "learning_rate": 1.940734845391507e-06, "loss": 0.7295, "step": 3014 }, { "epoch": 0.44, "grad_norm": 6.204832077026367, "learning_rate": 1.940681153183862e-06, "loss": 0.7747, "step": 3015 }, { "epoch": 0.44, "grad_norm": 5.815570831298828, "learning_rate": 1.9406274374089797e-06, "loss": 0.7648, "step": 3016 }, { "epoch": 0.44, "grad_norm": 5.523262023925781, "learning_rate": 1.940573698068205e-06, "loss": 0.6421, "step": 3017 }, { "epoch": 0.44, "grad_norm": 5.9119391441345215, "learning_rate": 1.9405199351628847e-06, "loss": 0.7903, "step": 3018 }, { "epoch": 0.44, "grad_norm": 5.356391906738281, "learning_rate": 1.9404661486943653e-06, "loss": 0.7052, "step": 3019 }, { "epoch": 0.44, "grad_norm": 6.477714538574219, "learning_rate": 1.9404123386639952e-06, "loss": 0.7207, "step": 3020 }, { "epoch": 0.44, "grad_norm": 7.30403995513916, "learning_rate": 1.9403585050731216e-06, "loss": 0.7859, "step": 3021 }, { "epoch": 0.44, "grad_norm": 6.443980693817139, "learning_rate": 1.9403046479230934e-06, "loss": 0.7023, "step": 3022 }, { "epoch": 0.44, "grad_norm": 5.801497936248779, "learning_rate": 1.9402507672152604e-06, "loss": 0.7489, "step": 3023 }, { "epoch": 0.44, "grad_norm": 5.275594711303711, "learning_rate": 1.9401968629509723e-06, "loss": 0.6674, "step": 3024 }, { "epoch": 0.44, "grad_norm": 6.24968957901001, "learning_rate": 1.9401429351315796e-06, "loss": 0.7263, "step": 3025 }, { "epoch": 0.44, "grad_norm": 6.285468578338623, "learning_rate": 1.9400889837584327e-06, "loss": 0.7573, "step": 3026 }, { "epoch": 0.44, "grad_norm": 6.151015758514404, "learning_rate": 1.9400350088328837e-06, "loss": 0.8401, "step": 3027 }, { "epoch": 0.44, "grad_norm": 6.307131767272949, "learning_rate": 1.9399810103562853e-06, "loss": 0.8746, "step": 3028 }, { "epoch": 0.44, "grad_norm": 5.962406635284424, "learning_rate": 1.93992698832999e-06, "loss": 0.6825, "step": 3029 }, { "epoch": 0.44, "grad_norm": 5.820333957672119, "learning_rate": 1.939872942755351e-06, "loss": 0.8475, "step": 3030 }, { "epoch": 0.44, "grad_norm": 5.689323902130127, "learning_rate": 1.9398188736337227e-06, "loss": 0.6852, "step": 3031 }, { "epoch": 0.44, "grad_norm": 7.634704113006592, "learning_rate": 1.9397647809664597e-06, "loss": 0.8535, "step": 3032 }, { "epoch": 0.44, "grad_norm": 5.279076099395752, "learning_rate": 1.9397106647549172e-06, "loss": 0.6973, "step": 3033 }, { "epoch": 0.44, "grad_norm": 5.894764423370361, "learning_rate": 1.9396565250004506e-06, "loss": 0.7352, "step": 3034 }, { "epoch": 0.44, "grad_norm": 6.3531012535095215, "learning_rate": 1.9396023617044163e-06, "loss": 0.7125, "step": 3035 }, { "epoch": 0.44, "grad_norm": 6.067461013793945, "learning_rate": 1.9395481748681718e-06, "loss": 0.7543, "step": 3036 }, { "epoch": 0.44, "grad_norm": 5.515002727508545, "learning_rate": 1.9394939644930743e-06, "loss": 0.6995, "step": 3037 }, { "epoch": 0.44, "grad_norm": 5.707164764404297, "learning_rate": 1.939439730580482e-06, "loss": 0.7416, "step": 3038 }, { "epoch": 0.44, "grad_norm": 5.607475280761719, "learning_rate": 1.939385473131754e-06, "loss": 0.6861, "step": 3039 }, { "epoch": 0.44, "grad_norm": 5.932619094848633, "learning_rate": 1.9393311921482493e-06, "loss": 0.6935, "step": 3040 }, { "epoch": 0.44, "grad_norm": 5.5870747566223145, "learning_rate": 1.9392768876313275e-06, "loss": 0.7189, "step": 3041 }, { "epoch": 0.44, "grad_norm": 6.410589218139648, "learning_rate": 1.9392225595823493e-06, "loss": 0.785, "step": 3042 }, { "epoch": 0.44, "grad_norm": 5.419177532196045, "learning_rate": 1.9391682080026765e-06, "loss": 0.7061, "step": 3043 }, { "epoch": 0.44, "grad_norm": 5.680371284484863, "learning_rate": 1.9391138328936697e-06, "loss": 0.6456, "step": 3044 }, { "epoch": 0.44, "grad_norm": 6.016870498657227, "learning_rate": 1.9390594342566923e-06, "loss": 0.7955, "step": 3045 }, { "epoch": 0.44, "grad_norm": 6.691929340362549, "learning_rate": 1.9390050120931062e-06, "loss": 0.7741, "step": 3046 }, { "epoch": 0.44, "grad_norm": 5.404856204986572, "learning_rate": 1.9389505664042753e-06, "loss": 0.768, "step": 3047 }, { "epoch": 0.44, "grad_norm": 5.919961452484131, "learning_rate": 1.9388960971915636e-06, "loss": 0.7433, "step": 3048 }, { "epoch": 0.44, "grad_norm": 6.1143412590026855, "learning_rate": 1.9388416044563357e-06, "loss": 0.7698, "step": 3049 }, { "epoch": 0.44, "grad_norm": 5.698861122131348, "learning_rate": 1.9387870881999567e-06, "loss": 0.7564, "step": 3050 }, { "epoch": 0.44, "grad_norm": 6.040593147277832, "learning_rate": 1.9387325484237927e-06, "loss": 0.7549, "step": 3051 }, { "epoch": 0.44, "grad_norm": 6.451703071594238, "learning_rate": 1.9386779851292105e-06, "loss": 0.9298, "step": 3052 }, { "epoch": 0.44, "grad_norm": 5.705104351043701, "learning_rate": 1.938623398317576e-06, "loss": 0.6628, "step": 3053 }, { "epoch": 0.44, "grad_norm": 5.771088123321533, "learning_rate": 1.9385687879902572e-06, "loss": 0.7455, "step": 3054 }, { "epoch": 0.44, "grad_norm": 5.898204803466797, "learning_rate": 1.9385141541486226e-06, "loss": 0.7693, "step": 3055 }, { "epoch": 0.44, "grad_norm": 7.626099586486816, "learning_rate": 1.938459496794041e-06, "loss": 0.8144, "step": 3056 }, { "epoch": 0.44, "grad_norm": 5.876559257507324, "learning_rate": 1.9384048159278815e-06, "loss": 0.6522, "step": 3057 }, { "epoch": 0.44, "grad_norm": 5.535085201263428, "learning_rate": 1.938350111551514e-06, "loss": 0.7497, "step": 3058 }, { "epoch": 0.44, "grad_norm": 7.481646537780762, "learning_rate": 1.938295383666309e-06, "loss": 0.7922, "step": 3059 }, { "epoch": 0.44, "grad_norm": 6.538514614105225, "learning_rate": 1.938240632273638e-06, "loss": 0.7539, "step": 3060 }, { "epoch": 0.44, "grad_norm": 6.482182025909424, "learning_rate": 1.938185857374872e-06, "loss": 0.7356, "step": 3061 }, { "epoch": 0.44, "grad_norm": 6.905572891235352, "learning_rate": 1.9381310589713845e-06, "loss": 0.7246, "step": 3062 }, { "epoch": 0.44, "grad_norm": 5.714797496795654, "learning_rate": 1.938076237064547e-06, "loss": 0.7484, "step": 3063 }, { "epoch": 0.44, "grad_norm": 6.2687835693359375, "learning_rate": 1.9380213916557336e-06, "loss": 0.7261, "step": 3064 }, { "epoch": 0.44, "grad_norm": 5.9646148681640625, "learning_rate": 1.937966522746319e-06, "loss": 0.7499, "step": 3065 }, { "epoch": 0.44, "grad_norm": 5.60497522354126, "learning_rate": 1.937911630337676e-06, "loss": 0.6334, "step": 3066 }, { "epoch": 0.45, "grad_norm": 5.982097148895264, "learning_rate": 1.937856714431182e-06, "loss": 0.7019, "step": 3067 }, { "epoch": 0.45, "grad_norm": 7.3753790855407715, "learning_rate": 1.937801775028212e-06, "loss": 0.7796, "step": 3068 }, { "epoch": 0.45, "grad_norm": 7.100668907165527, "learning_rate": 1.937746812130142e-06, "loss": 0.7417, "step": 3069 }, { "epoch": 0.45, "grad_norm": 5.952681541442871, "learning_rate": 1.937691825738349e-06, "loss": 0.757, "step": 3070 }, { "epoch": 0.45, "grad_norm": 6.023703575134277, "learning_rate": 1.9376368158542113e-06, "loss": 0.732, "step": 3071 }, { "epoch": 0.45, "grad_norm": 6.473233699798584, "learning_rate": 1.9375817824791065e-06, "loss": 0.7701, "step": 3072 }, { "epoch": 0.45, "grad_norm": 5.685151100158691, "learning_rate": 1.9375267256144135e-06, "loss": 0.7098, "step": 3073 }, { "epoch": 0.45, "grad_norm": 6.469274044036865, "learning_rate": 1.9374716452615117e-06, "loss": 0.7399, "step": 3074 }, { "epoch": 0.45, "grad_norm": 6.305997371673584, "learning_rate": 1.937416541421781e-06, "loss": 0.7416, "step": 3075 }, { "epoch": 0.45, "grad_norm": 6.006763458251953, "learning_rate": 1.9373614140966023e-06, "loss": 0.7497, "step": 3076 }, { "epoch": 0.45, "grad_norm": 6.569715976715088, "learning_rate": 1.9373062632873564e-06, "loss": 0.8751, "step": 3077 }, { "epoch": 0.45, "grad_norm": 5.667377948760986, "learning_rate": 1.9372510889954247e-06, "loss": 0.7483, "step": 3078 }, { "epoch": 0.45, "grad_norm": 6.657388210296631, "learning_rate": 1.93719589122219e-06, "loss": 0.777, "step": 3079 }, { "epoch": 0.45, "grad_norm": 6.26107931137085, "learning_rate": 1.937140669969035e-06, "loss": 0.774, "step": 3080 }, { "epoch": 0.45, "grad_norm": 6.279524326324463, "learning_rate": 1.9370854252373433e-06, "loss": 0.7432, "step": 3081 }, { "epoch": 0.45, "grad_norm": 5.456998825073242, "learning_rate": 1.9370301570284987e-06, "loss": 0.7158, "step": 3082 }, { "epoch": 0.45, "grad_norm": 5.913636684417725, "learning_rate": 1.9369748653438865e-06, "loss": 0.7158, "step": 3083 }, { "epoch": 0.45, "grad_norm": 6.244728088378906, "learning_rate": 1.936919550184891e-06, "loss": 0.7584, "step": 3084 }, { "epoch": 0.45, "grad_norm": 6.2975921630859375, "learning_rate": 1.9368642115528986e-06, "loss": 0.7348, "step": 3085 }, { "epoch": 0.45, "grad_norm": 5.6276326179504395, "learning_rate": 1.9368088494492955e-06, "loss": 0.7708, "step": 3086 }, { "epoch": 0.45, "grad_norm": 5.860891819000244, "learning_rate": 1.936753463875469e-06, "loss": 0.7599, "step": 3087 }, { "epoch": 0.45, "grad_norm": 5.663794994354248, "learning_rate": 1.9366980548328066e-06, "loss": 0.7101, "step": 3088 }, { "epoch": 0.45, "grad_norm": 5.809958457946777, "learning_rate": 1.9366426223226963e-06, "loss": 0.7384, "step": 3089 }, { "epoch": 0.45, "grad_norm": 6.386214733123779, "learning_rate": 1.936587166346527e-06, "loss": 0.6952, "step": 3090 }, { "epoch": 0.45, "grad_norm": 5.477075099945068, "learning_rate": 1.9365316869056883e-06, "loss": 0.7356, "step": 3091 }, { "epoch": 0.45, "grad_norm": 5.976676940917969, "learning_rate": 1.936476184001569e-06, "loss": 0.7544, "step": 3092 }, { "epoch": 0.45, "grad_norm": 5.596888542175293, "learning_rate": 1.9364206576355615e-06, "loss": 0.7043, "step": 3093 }, { "epoch": 0.45, "grad_norm": 6.901814937591553, "learning_rate": 1.9363651078090558e-06, "loss": 0.6677, "step": 3094 }, { "epoch": 0.45, "grad_norm": 5.826625347137451, "learning_rate": 1.9363095345234436e-06, "loss": 0.8346, "step": 3095 }, { "epoch": 0.45, "grad_norm": 7.152881145477295, "learning_rate": 1.9362539377801174e-06, "loss": 0.8351, "step": 3096 }, { "epoch": 0.45, "grad_norm": 5.504403591156006, "learning_rate": 1.93619831758047e-06, "loss": 0.7144, "step": 3097 }, { "epoch": 0.45, "grad_norm": 6.283389091491699, "learning_rate": 1.9361426739258948e-06, "loss": 0.7717, "step": 3098 }, { "epoch": 0.45, "grad_norm": 6.025872230529785, "learning_rate": 1.936087006817786e-06, "loss": 0.7406, "step": 3099 }, { "epoch": 0.45, "grad_norm": 5.6991095542907715, "learning_rate": 1.936031316257538e-06, "loss": 0.7071, "step": 3100 }, { "epoch": 0.45, "grad_norm": 5.438429832458496, "learning_rate": 1.9359756022465467e-06, "loss": 0.7316, "step": 3101 }, { "epoch": 0.45, "grad_norm": 6.27431583404541, "learning_rate": 1.9359198647862075e-06, "loss": 0.6884, "step": 3102 }, { "epoch": 0.45, "grad_norm": 6.480736255645752, "learning_rate": 1.9358641038779165e-06, "loss": 0.7254, "step": 3103 }, { "epoch": 0.45, "grad_norm": 5.855846405029297, "learning_rate": 1.935808319523071e-06, "loss": 0.7264, "step": 3104 }, { "epoch": 0.45, "grad_norm": 5.54074239730835, "learning_rate": 1.935752511723069e-06, "loss": 0.7618, "step": 3105 }, { "epoch": 0.45, "grad_norm": 5.647684574127197, "learning_rate": 1.935696680479308e-06, "loss": 0.6816, "step": 3106 }, { "epoch": 0.45, "grad_norm": 5.721245765686035, "learning_rate": 1.935640825793187e-06, "loss": 0.7458, "step": 3107 }, { "epoch": 0.45, "grad_norm": 6.356338024139404, "learning_rate": 1.9355849476661053e-06, "loss": 0.7214, "step": 3108 }, { "epoch": 0.45, "grad_norm": 6.078639507293701, "learning_rate": 1.935529046099463e-06, "loss": 0.7215, "step": 3109 }, { "epoch": 0.45, "grad_norm": 6.2896857261657715, "learning_rate": 1.9354731210946603e-06, "loss": 0.7167, "step": 3110 }, { "epoch": 0.45, "grad_norm": 6.463398456573486, "learning_rate": 1.9354171726530986e-06, "loss": 0.6333, "step": 3111 }, { "epoch": 0.45, "grad_norm": 5.65001916885376, "learning_rate": 1.93536120077618e-06, "loss": 0.7425, "step": 3112 }, { "epoch": 0.45, "grad_norm": 6.281607151031494, "learning_rate": 1.9353052054653058e-06, "loss": 0.7502, "step": 3113 }, { "epoch": 0.45, "grad_norm": 6.618834018707275, "learning_rate": 1.9352491867218796e-06, "loss": 0.7709, "step": 3114 }, { "epoch": 0.45, "grad_norm": 6.413399696350098, "learning_rate": 1.9351931445473047e-06, "loss": 0.746, "step": 3115 }, { "epoch": 0.45, "grad_norm": 5.559230804443359, "learning_rate": 1.9351370789429848e-06, "loss": 0.8011, "step": 3116 }, { "epoch": 0.45, "grad_norm": 6.102116584777832, "learning_rate": 1.9350809899103252e-06, "loss": 0.8219, "step": 3117 }, { "epoch": 0.45, "grad_norm": 6.01672887802124, "learning_rate": 1.9350248774507304e-06, "loss": 0.7569, "step": 3118 }, { "epoch": 0.45, "grad_norm": 6.441464424133301, "learning_rate": 1.9349687415656068e-06, "loss": 0.6694, "step": 3119 }, { "epoch": 0.45, "grad_norm": 5.832837104797363, "learning_rate": 1.9349125822563605e-06, "loss": 0.7446, "step": 3120 }, { "epoch": 0.45, "grad_norm": 5.446077823638916, "learning_rate": 1.9348563995243983e-06, "loss": 0.7534, "step": 3121 }, { "epoch": 0.45, "grad_norm": 7.308160781860352, "learning_rate": 1.934800193371128e-06, "loss": 0.7407, "step": 3122 }, { "epoch": 0.45, "grad_norm": 6.160983085632324, "learning_rate": 1.934743963797958e-06, "loss": 0.7643, "step": 3123 }, { "epoch": 0.45, "grad_norm": 5.701531410217285, "learning_rate": 1.934687710806297e-06, "loss": 0.7105, "step": 3124 }, { "epoch": 0.45, "grad_norm": 5.1844329833984375, "learning_rate": 1.9346314343975536e-06, "loss": 0.6764, "step": 3125 }, { "epoch": 0.45, "grad_norm": 5.5551981925964355, "learning_rate": 1.9345751345731384e-06, "loss": 0.735, "step": 3126 }, { "epoch": 0.45, "grad_norm": 5.034853935241699, "learning_rate": 1.934518811334462e-06, "loss": 0.6774, "step": 3127 }, { "epoch": 0.45, "grad_norm": 6.347071170806885, "learning_rate": 1.9344624646829348e-06, "loss": 0.7629, "step": 3128 }, { "epoch": 0.45, "grad_norm": 5.701129913330078, "learning_rate": 1.934406094619969e-06, "loss": 0.6939, "step": 3129 }, { "epoch": 0.45, "grad_norm": 5.886855125427246, "learning_rate": 1.9343497011469768e-06, "loss": 0.7275, "step": 3130 }, { "epoch": 0.45, "grad_norm": 6.051175594329834, "learning_rate": 1.9342932842653712e-06, "loss": 0.6643, "step": 3131 }, { "epoch": 0.45, "grad_norm": 5.876243591308594, "learning_rate": 1.9342368439765654e-06, "loss": 0.7497, "step": 3132 }, { "epoch": 0.45, "grad_norm": 5.5209245681762695, "learning_rate": 1.934180380281973e-06, "loss": 0.6993, "step": 3133 }, { "epoch": 0.45, "grad_norm": 6.06431770324707, "learning_rate": 1.9341238931830095e-06, "loss": 0.7418, "step": 3134 }, { "epoch": 0.45, "grad_norm": 5.657205581665039, "learning_rate": 1.9340673826810898e-06, "loss": 0.8239, "step": 3135 }, { "epoch": 0.46, "grad_norm": 5.537092208862305, "learning_rate": 1.934010848777629e-06, "loss": 0.6673, "step": 3136 }, { "epoch": 0.46, "grad_norm": 6.4162139892578125, "learning_rate": 1.9339542914740445e-06, "loss": 0.8138, "step": 3137 }, { "epoch": 0.46, "grad_norm": 5.34918212890625, "learning_rate": 1.933897710771752e-06, "loss": 0.7438, "step": 3138 }, { "epoch": 0.46, "grad_norm": 6.1371636390686035, "learning_rate": 1.9338411066721705e-06, "loss": 0.8698, "step": 3139 }, { "epoch": 0.46, "grad_norm": 6.016498565673828, "learning_rate": 1.933784479176717e-06, "loss": 0.6832, "step": 3140 }, { "epoch": 0.46, "grad_norm": 5.205608367919922, "learning_rate": 1.933727828286811e-06, "loss": 0.7356, "step": 3141 }, { "epoch": 0.46, "grad_norm": 6.1953654289245605, "learning_rate": 1.933671154003871e-06, "loss": 0.7277, "step": 3142 }, { "epoch": 0.46, "grad_norm": 5.5943074226379395, "learning_rate": 1.9336144563293174e-06, "loss": 0.7114, "step": 3143 }, { "epoch": 0.46, "grad_norm": 5.679205417633057, "learning_rate": 1.933557735264571e-06, "loss": 0.7317, "step": 3144 }, { "epoch": 0.46, "grad_norm": 6.247664451599121, "learning_rate": 1.9335009908110516e-06, "loss": 0.7297, "step": 3145 }, { "epoch": 0.46, "grad_norm": 5.826810359954834, "learning_rate": 1.9334442229701823e-06, "loss": 0.7633, "step": 3146 }, { "epoch": 0.46, "grad_norm": 6.045828819274902, "learning_rate": 1.9333874317433843e-06, "loss": 0.7007, "step": 3147 }, { "epoch": 0.46, "grad_norm": 6.654208183288574, "learning_rate": 1.933330617132081e-06, "loss": 0.7867, "step": 3148 }, { "epoch": 0.46, "grad_norm": 5.922853469848633, "learning_rate": 1.933273779137696e-06, "loss": 0.6858, "step": 3149 }, { "epoch": 0.46, "grad_norm": 6.124772071838379, "learning_rate": 1.9332169177616523e-06, "loss": 0.7082, "step": 3150 }, { "epoch": 0.46, "grad_norm": 5.519037246704102, "learning_rate": 1.933160033005375e-06, "loss": 0.7237, "step": 3151 }, { "epoch": 0.46, "grad_norm": 6.027711868286133, "learning_rate": 1.9331031248702896e-06, "loss": 0.8289, "step": 3152 }, { "epoch": 0.46, "grad_norm": 5.673458099365234, "learning_rate": 1.933046193357821e-06, "loss": 0.6948, "step": 3153 }, { "epoch": 0.46, "grad_norm": 6.033314228057861, "learning_rate": 1.9329892384693965e-06, "loss": 0.7804, "step": 3154 }, { "epoch": 0.46, "grad_norm": 5.98630428314209, "learning_rate": 1.932932260206443e-06, "loss": 0.6998, "step": 3155 }, { "epoch": 0.46, "grad_norm": 5.798985481262207, "learning_rate": 1.932875258570387e-06, "loss": 0.7543, "step": 3156 }, { "epoch": 0.46, "grad_norm": 7.851129055023193, "learning_rate": 1.9328182335626572e-06, "loss": 0.828, "step": 3157 }, { "epoch": 0.46, "grad_norm": 6.471677780151367, "learning_rate": 1.9327611851846825e-06, "loss": 0.8028, "step": 3158 }, { "epoch": 0.46, "grad_norm": 6.171827793121338, "learning_rate": 1.9327041134378916e-06, "loss": 0.7625, "step": 3159 }, { "epoch": 0.46, "grad_norm": 6.163785457611084, "learning_rate": 1.932647018323715e-06, "loss": 0.733, "step": 3160 }, { "epoch": 0.46, "grad_norm": 6.222750663757324, "learning_rate": 1.9325898998435824e-06, "loss": 0.6555, "step": 3161 }, { "epoch": 0.46, "grad_norm": 5.841150760650635, "learning_rate": 1.932532757998925e-06, "loss": 0.689, "step": 3162 }, { "epoch": 0.46, "grad_norm": 5.358097076416016, "learning_rate": 1.932475592791175e-06, "loss": 0.6887, "step": 3163 }, { "epoch": 0.46, "grad_norm": 5.8606085777282715, "learning_rate": 1.9324184042217636e-06, "loss": 0.7521, "step": 3164 }, { "epoch": 0.46, "grad_norm": 5.67630672454834, "learning_rate": 1.9323611922921245e-06, "loss": 0.7202, "step": 3165 }, { "epoch": 0.46, "grad_norm": 6.322342872619629, "learning_rate": 1.932303957003691e-06, "loss": 0.7269, "step": 3166 }, { "epoch": 0.46, "grad_norm": 5.827653408050537, "learning_rate": 1.932246698357896e-06, "loss": 0.7556, "step": 3167 }, { "epoch": 0.46, "grad_norm": 6.393311500549316, "learning_rate": 1.932189416356175e-06, "loss": 0.7207, "step": 3168 }, { "epoch": 0.46, "grad_norm": 6.634562969207764, "learning_rate": 1.932132110999963e-06, "loss": 0.8605, "step": 3169 }, { "epoch": 0.46, "grad_norm": 5.999004364013672, "learning_rate": 1.9320747822906952e-06, "loss": 0.7686, "step": 3170 }, { "epoch": 0.46, "grad_norm": 6.173406600952148, "learning_rate": 1.9320174302298083e-06, "loss": 0.6533, "step": 3171 }, { "epoch": 0.46, "grad_norm": 6.091327667236328, "learning_rate": 1.931960054818739e-06, "loss": 0.77, "step": 3172 }, { "epoch": 0.46, "grad_norm": 6.323978424072266, "learning_rate": 1.9319026560589247e-06, "loss": 0.7911, "step": 3173 }, { "epoch": 0.46, "grad_norm": 5.750338554382324, "learning_rate": 1.931845233951804e-06, "loss": 0.6053, "step": 3174 }, { "epoch": 0.46, "grad_norm": 6.25039529800415, "learning_rate": 1.9317877884988148e-06, "loss": 0.7127, "step": 3175 }, { "epoch": 0.46, "grad_norm": 6.870731830596924, "learning_rate": 1.9317303197013967e-06, "loss": 0.7335, "step": 3176 }, { "epoch": 0.46, "grad_norm": 6.956201553344727, "learning_rate": 1.931672827560989e-06, "loss": 0.7645, "step": 3177 }, { "epoch": 0.46, "grad_norm": 6.343869686126709, "learning_rate": 1.9316153120790327e-06, "loss": 0.835, "step": 3178 }, { "epoch": 0.46, "grad_norm": 6.318094730377197, "learning_rate": 1.9315577732569685e-06, "loss": 0.7132, "step": 3179 }, { "epoch": 0.46, "grad_norm": 6.2682671546936035, "learning_rate": 1.9315002110962374e-06, "loss": 0.7588, "step": 3180 }, { "epoch": 0.46, "grad_norm": 5.909903049468994, "learning_rate": 1.9314426255982824e-06, "loss": 0.7085, "step": 3181 }, { "epoch": 0.46, "grad_norm": 5.942624092102051, "learning_rate": 1.9313850167645456e-06, "loss": 0.7633, "step": 3182 }, { "epoch": 0.46, "grad_norm": 6.35487699508667, "learning_rate": 1.931327384596471e-06, "loss": 0.7192, "step": 3183 }, { "epoch": 0.46, "grad_norm": 5.952311038970947, "learning_rate": 1.931269729095502e-06, "loss": 0.7787, "step": 3184 }, { "epoch": 0.46, "grad_norm": 5.745632171630859, "learning_rate": 1.931212050263083e-06, "loss": 0.6315, "step": 3185 }, { "epoch": 0.46, "grad_norm": 6.397357940673828, "learning_rate": 1.931154348100659e-06, "loss": 0.7364, "step": 3186 }, { "epoch": 0.46, "grad_norm": 5.508254051208496, "learning_rate": 1.931096622609676e-06, "loss": 0.6707, "step": 3187 }, { "epoch": 0.46, "grad_norm": 5.56846809387207, "learning_rate": 1.9310388737915795e-06, "loss": 0.6735, "step": 3188 }, { "epoch": 0.46, "grad_norm": 5.421341896057129, "learning_rate": 1.9309811016478173e-06, "loss": 0.7366, "step": 3189 }, { "epoch": 0.46, "grad_norm": 5.207302570343018, "learning_rate": 1.930923306179836e-06, "loss": 0.6623, "step": 3190 }, { "epoch": 0.46, "grad_norm": 5.593886852264404, "learning_rate": 1.930865487389084e-06, "loss": 0.6832, "step": 3191 }, { "epoch": 0.46, "grad_norm": 5.872745513916016, "learning_rate": 1.9308076452770097e-06, "loss": 0.7381, "step": 3192 }, { "epoch": 0.46, "grad_norm": 6.164592742919922, "learning_rate": 1.930749779845062e-06, "loss": 0.7098, "step": 3193 }, { "epoch": 0.46, "grad_norm": 6.172823905944824, "learning_rate": 1.9306918910946914e-06, "loss": 0.6843, "step": 3194 }, { "epoch": 0.46, "grad_norm": 6.339613437652588, "learning_rate": 1.930633979027347e-06, "loss": 0.7745, "step": 3195 }, { "epoch": 0.46, "grad_norm": 6.417150974273682, "learning_rate": 1.930576043644481e-06, "loss": 0.7792, "step": 3196 }, { "epoch": 0.46, "grad_norm": 5.748034477233887, "learning_rate": 1.9305180849475444e-06, "loss": 0.6211, "step": 3197 }, { "epoch": 0.46, "grad_norm": 5.239355087280273, "learning_rate": 1.9304601029379885e-06, "loss": 0.7444, "step": 3198 }, { "epoch": 0.46, "grad_norm": 5.908611297607422, "learning_rate": 1.930402097617267e-06, "loss": 0.7941, "step": 3199 }, { "epoch": 0.46, "grad_norm": 6.134199619293213, "learning_rate": 1.930344068986832e-06, "loss": 0.7393, "step": 3200 }, { "epoch": 0.46, "grad_norm": 5.24754524230957, "learning_rate": 1.930286017048139e-06, "loss": 0.6897, "step": 3201 }, { "epoch": 0.46, "grad_norm": 5.257645130157471, "learning_rate": 1.9302279418026406e-06, "loss": 0.7447, "step": 3202 }, { "epoch": 0.46, "grad_norm": 6.458157539367676, "learning_rate": 1.930169843251793e-06, "loss": 0.7965, "step": 3203 }, { "epoch": 0.46, "grad_norm": 6.056290626525879, "learning_rate": 1.930111721397051e-06, "loss": 0.7443, "step": 3204 }, { "epoch": 0.47, "grad_norm": 5.91050386428833, "learning_rate": 1.9300535762398714e-06, "loss": 0.7744, "step": 3205 }, { "epoch": 0.47, "grad_norm": 5.295782566070557, "learning_rate": 1.9299954077817103e-06, "loss": 0.7926, "step": 3206 }, { "epoch": 0.47, "grad_norm": 6.297211647033691, "learning_rate": 1.9299372160240255e-06, "loss": 0.8149, "step": 3207 }, { "epoch": 0.47, "grad_norm": 5.394230365753174, "learning_rate": 1.929879000968275e-06, "loss": 0.7459, "step": 3208 }, { "epoch": 0.47, "grad_norm": 5.688737869262695, "learning_rate": 1.929820762615917e-06, "loss": 0.7471, "step": 3209 }, { "epoch": 0.47, "grad_norm": 5.546014308929443, "learning_rate": 1.9297625009684103e-06, "loss": 0.6856, "step": 3210 }, { "epoch": 0.47, "grad_norm": 6.622730731964111, "learning_rate": 1.929704216027215e-06, "loss": 0.7026, "step": 3211 }, { "epoch": 0.47, "grad_norm": 6.137167930603027, "learning_rate": 1.929645907793791e-06, "loss": 0.7647, "step": 3212 }, { "epoch": 0.47, "grad_norm": 5.977531433105469, "learning_rate": 1.9295875762695995e-06, "loss": 0.7454, "step": 3213 }, { "epoch": 0.47, "grad_norm": 5.63460111618042, "learning_rate": 1.9295292214561018e-06, "loss": 0.777, "step": 3214 }, { "epoch": 0.47, "grad_norm": 5.79478120803833, "learning_rate": 1.9294708433547595e-06, "loss": 0.7055, "step": 3215 }, { "epoch": 0.47, "grad_norm": 5.791365623474121, "learning_rate": 1.9294124419670353e-06, "loss": 0.6955, "step": 3216 }, { "epoch": 0.47, "grad_norm": 5.8503522872924805, "learning_rate": 1.929354017294393e-06, "loss": 0.6792, "step": 3217 }, { "epoch": 0.47, "grad_norm": 5.879204273223877, "learning_rate": 1.929295569338296e-06, "loss": 0.6773, "step": 3218 }, { "epoch": 0.47, "grad_norm": 5.469989776611328, "learning_rate": 1.9292370981002076e-06, "loss": 0.6836, "step": 3219 }, { "epoch": 0.47, "grad_norm": 6.025731086730957, "learning_rate": 1.929178603581594e-06, "loss": 0.753, "step": 3220 }, { "epoch": 0.47, "grad_norm": 6.113920211791992, "learning_rate": 1.9291200857839203e-06, "loss": 0.6881, "step": 3221 }, { "epoch": 0.47, "grad_norm": 5.890515327453613, "learning_rate": 1.9290615447086525e-06, "loss": 0.7502, "step": 3222 }, { "epoch": 0.47, "grad_norm": 5.706811904907227, "learning_rate": 1.929002980357257e-06, "loss": 0.7808, "step": 3223 }, { "epoch": 0.47, "grad_norm": 5.553131580352783, "learning_rate": 1.928944392731202e-06, "loss": 0.6647, "step": 3224 }, { "epoch": 0.47, "grad_norm": 6.298159122467041, "learning_rate": 1.9288857818319543e-06, "loss": 0.7256, "step": 3225 }, { "epoch": 0.47, "grad_norm": 6.245255470275879, "learning_rate": 1.928827147660982e-06, "loss": 0.7877, "step": 3226 }, { "epoch": 0.47, "grad_norm": 5.727780818939209, "learning_rate": 1.928768490219756e-06, "loss": 0.7017, "step": 3227 }, { "epoch": 0.47, "grad_norm": 5.911287784576416, "learning_rate": 1.9287098095097433e-06, "loss": 0.6403, "step": 3228 }, { "epoch": 0.47, "grad_norm": 6.228282451629639, "learning_rate": 1.9286511055324155e-06, "loss": 0.84, "step": 3229 }, { "epoch": 0.47, "grad_norm": 5.855714797973633, "learning_rate": 1.9285923782892437e-06, "loss": 0.7189, "step": 3230 }, { "epoch": 0.47, "grad_norm": 5.579704761505127, "learning_rate": 1.9285336277816983e-06, "loss": 0.7109, "step": 3231 }, { "epoch": 0.47, "grad_norm": 5.723978519439697, "learning_rate": 1.9284748540112514e-06, "loss": 0.7077, "step": 3232 }, { "epoch": 0.47, "grad_norm": 6.22641658782959, "learning_rate": 1.928416056979376e-06, "loss": 0.758, "step": 3233 }, { "epoch": 0.47, "grad_norm": 5.431269645690918, "learning_rate": 1.9283572366875446e-06, "loss": 0.8057, "step": 3234 }, { "epoch": 0.47, "grad_norm": 5.248580455780029, "learning_rate": 1.9282983931372314e-06, "loss": 0.6923, "step": 3235 }, { "epoch": 0.47, "grad_norm": 5.344054222106934, "learning_rate": 1.92823952632991e-06, "loss": 0.6885, "step": 3236 }, { "epoch": 0.47, "grad_norm": 5.829815864562988, "learning_rate": 1.9281806362670556e-06, "loss": 0.645, "step": 3237 }, { "epoch": 0.47, "grad_norm": 5.293117046356201, "learning_rate": 1.9281217229501436e-06, "loss": 0.6606, "step": 3238 }, { "epoch": 0.47, "grad_norm": 6.164968967437744, "learning_rate": 1.9280627863806495e-06, "loss": 0.782, "step": 3239 }, { "epoch": 0.47, "grad_norm": 5.839888572692871, "learning_rate": 1.9280038265600503e-06, "loss": 0.7529, "step": 3240 }, { "epoch": 0.47, "grad_norm": 5.629209041595459, "learning_rate": 1.9279448434898234e-06, "loss": 0.7298, "step": 3241 }, { "epoch": 0.47, "grad_norm": 6.430738925933838, "learning_rate": 1.927885837171446e-06, "loss": 0.7784, "step": 3242 }, { "epoch": 0.47, "grad_norm": 6.762538433074951, "learning_rate": 1.9278268076063967e-06, "loss": 0.7525, "step": 3243 }, { "epoch": 0.47, "grad_norm": 5.866162300109863, "learning_rate": 1.9277677547961543e-06, "loss": 0.6763, "step": 3244 }, { "epoch": 0.47, "grad_norm": 6.994044303894043, "learning_rate": 1.927708678742198e-06, "loss": 0.6843, "step": 3245 }, { "epoch": 0.47, "grad_norm": 5.80771541595459, "learning_rate": 1.927649579446008e-06, "loss": 0.7373, "step": 3246 }, { "epoch": 0.47, "grad_norm": 5.152246952056885, "learning_rate": 1.9275904569090655e-06, "loss": 0.7525, "step": 3247 }, { "epoch": 0.47, "grad_norm": 6.025317668914795, "learning_rate": 1.927531311132851e-06, "loss": 0.7815, "step": 3248 }, { "epoch": 0.47, "grad_norm": 6.489810466766357, "learning_rate": 1.9274721421188473e-06, "loss": 0.7759, "step": 3249 }, { "epoch": 0.47, "grad_norm": 5.433025360107422, "learning_rate": 1.927412949868535e-06, "loss": 0.7277, "step": 3250 }, { "epoch": 0.47, "grad_norm": 5.915510177612305, "learning_rate": 1.927353734383399e-06, "loss": 0.8003, "step": 3251 }, { "epoch": 0.47, "grad_norm": 5.838104724884033, "learning_rate": 1.9272944956649215e-06, "loss": 0.6986, "step": 3252 }, { "epoch": 0.47, "grad_norm": 5.421257019042969, "learning_rate": 1.9272352337145872e-06, "loss": 0.7779, "step": 3253 }, { "epoch": 0.47, "grad_norm": 5.91480827331543, "learning_rate": 1.9271759485338806e-06, "loss": 0.812, "step": 3254 }, { "epoch": 0.47, "grad_norm": 6.279361724853516, "learning_rate": 1.927116640124287e-06, "loss": 0.7462, "step": 3255 }, { "epoch": 0.47, "grad_norm": 5.610003471374512, "learning_rate": 1.927057308487293e-06, "loss": 0.7316, "step": 3256 }, { "epoch": 0.47, "grad_norm": 6.712777614593506, "learning_rate": 1.926997953624384e-06, "loss": 0.7386, "step": 3257 }, { "epoch": 0.47, "grad_norm": 6.416040897369385, "learning_rate": 1.9269385755370477e-06, "loss": 0.7936, "step": 3258 }, { "epoch": 0.47, "grad_norm": 6.293663501739502, "learning_rate": 1.9268791742267712e-06, "loss": 0.7296, "step": 3259 }, { "epoch": 0.47, "grad_norm": 6.080926418304443, "learning_rate": 1.926819749695043e-06, "loss": 0.7455, "step": 3260 }, { "epoch": 0.47, "grad_norm": 6.700211524963379, "learning_rate": 1.9267603019433522e-06, "loss": 0.7654, "step": 3261 }, { "epoch": 0.47, "grad_norm": 6.14285135269165, "learning_rate": 1.9267008309731874e-06, "loss": 0.7237, "step": 3262 }, { "epoch": 0.47, "grad_norm": 5.955174922943115, "learning_rate": 1.9266413367860393e-06, "loss": 0.6956, "step": 3263 }, { "epoch": 0.47, "grad_norm": 5.292327404022217, "learning_rate": 1.9265818193833986e-06, "loss": 0.7112, "step": 3264 }, { "epoch": 0.47, "grad_norm": 6.256394863128662, "learning_rate": 1.9265222787667552e-06, "loss": 0.7374, "step": 3265 }, { "epoch": 0.47, "grad_norm": 5.676502227783203, "learning_rate": 1.9264627149376017e-06, "loss": 0.7714, "step": 3266 }, { "epoch": 0.47, "grad_norm": 6.349547863006592, "learning_rate": 1.9264031278974305e-06, "loss": 0.7261, "step": 3267 }, { "epoch": 0.47, "grad_norm": 5.427931308746338, "learning_rate": 1.926343517647734e-06, "loss": 0.6737, "step": 3268 }, { "epoch": 0.47, "grad_norm": 6.2983903884887695, "learning_rate": 1.926283884190006e-06, "loss": 0.7723, "step": 3269 }, { "epoch": 0.47, "grad_norm": 5.528629302978516, "learning_rate": 1.9262242275257404e-06, "loss": 0.6886, "step": 3270 }, { "epoch": 0.47, "grad_norm": 5.465959548950195, "learning_rate": 1.9261645476564316e-06, "loss": 0.6867, "step": 3271 }, { "epoch": 0.47, "grad_norm": 5.924149036407471, "learning_rate": 1.9261048445835746e-06, "loss": 0.7162, "step": 3272 }, { "epoch": 0.47, "grad_norm": 5.179091453552246, "learning_rate": 1.926045118308666e-06, "loss": 0.7087, "step": 3273 }, { "epoch": 0.48, "grad_norm": 5.640568733215332, "learning_rate": 1.925985368833201e-06, "loss": 0.6904, "step": 3274 }, { "epoch": 0.48, "grad_norm": 6.140588760375977, "learning_rate": 1.925925596158678e-06, "loss": 0.72, "step": 3275 }, { "epoch": 0.48, "grad_norm": 5.390860557556152, "learning_rate": 1.925865800286593e-06, "loss": 0.6933, "step": 3276 }, { "epoch": 0.48, "grad_norm": 5.5375800132751465, "learning_rate": 1.925805981218445e-06, "loss": 0.6946, "step": 3277 }, { "epoch": 0.48, "grad_norm": 5.597754955291748, "learning_rate": 1.925746138955732e-06, "loss": 0.7291, "step": 3278 }, { "epoch": 0.48, "grad_norm": 5.537123680114746, "learning_rate": 1.925686273499954e-06, "loss": 0.7163, "step": 3279 }, { "epoch": 0.48, "grad_norm": 5.795709609985352, "learning_rate": 1.925626384852611e-06, "loss": 0.7121, "step": 3280 }, { "epoch": 0.48, "grad_norm": 5.913005352020264, "learning_rate": 1.925566473015202e-06, "loss": 0.7056, "step": 3281 }, { "epoch": 0.48, "grad_norm": 6.084397315979004, "learning_rate": 1.9255065379892294e-06, "loss": 0.8, "step": 3282 }, { "epoch": 0.48, "grad_norm": 5.347829341888428, "learning_rate": 1.925446579776194e-06, "loss": 0.6589, "step": 3283 }, { "epoch": 0.48, "grad_norm": 5.934023857116699, "learning_rate": 1.925386598377599e-06, "loss": 0.73, "step": 3284 }, { "epoch": 0.48, "grad_norm": 5.888912200927734, "learning_rate": 1.9253265937949457e-06, "loss": 0.7717, "step": 3285 }, { "epoch": 0.48, "grad_norm": 5.9470438957214355, "learning_rate": 1.925266566029738e-06, "loss": 0.7478, "step": 3286 }, { "epoch": 0.48, "grad_norm": 5.852925777435303, "learning_rate": 1.92520651508348e-06, "loss": 0.6699, "step": 3287 }, { "epoch": 0.48, "grad_norm": 6.217462062835693, "learning_rate": 1.925146440957676e-06, "loss": 0.8226, "step": 3288 }, { "epoch": 0.48, "grad_norm": 5.69259786605835, "learning_rate": 1.9250863436538316e-06, "loss": 0.702, "step": 3289 }, { "epoch": 0.48, "grad_norm": 5.8547892570495605, "learning_rate": 1.9250262231734517e-06, "loss": 0.7385, "step": 3290 }, { "epoch": 0.48, "grad_norm": 5.793663501739502, "learning_rate": 1.924966079518043e-06, "loss": 0.7243, "step": 3291 }, { "epoch": 0.48, "grad_norm": 6.428698539733887, "learning_rate": 1.9249059126891115e-06, "loss": 0.6984, "step": 3292 }, { "epoch": 0.48, "grad_norm": 5.682411193847656, "learning_rate": 1.9248457226881654e-06, "loss": 0.7288, "step": 3293 }, { "epoch": 0.48, "grad_norm": 6.2933349609375, "learning_rate": 1.9247855095167126e-06, "loss": 0.7957, "step": 3294 }, { "epoch": 0.48, "grad_norm": 5.301206588745117, "learning_rate": 1.9247252731762617e-06, "loss": 0.6974, "step": 3295 }, { "epoch": 0.48, "grad_norm": 5.412312984466553, "learning_rate": 1.9246650136683213e-06, "loss": 0.6986, "step": 3296 }, { "epoch": 0.48, "grad_norm": 6.612686634063721, "learning_rate": 1.924604730994401e-06, "loss": 0.7635, "step": 3297 }, { "epoch": 0.48, "grad_norm": 5.511231899261475, "learning_rate": 1.924544425156012e-06, "loss": 0.6923, "step": 3298 }, { "epoch": 0.48, "grad_norm": 6.1594743728637695, "learning_rate": 1.9244840961546647e-06, "loss": 0.7632, "step": 3299 }, { "epoch": 0.48, "grad_norm": 6.142796516418457, "learning_rate": 1.9244237439918705e-06, "loss": 0.7743, "step": 3300 }, { "epoch": 0.48, "grad_norm": 5.9111127853393555, "learning_rate": 1.9243633686691415e-06, "loss": 0.7205, "step": 3301 }, { "epoch": 0.48, "grad_norm": 5.56128454208374, "learning_rate": 1.92430297018799e-06, "loss": 0.7079, "step": 3302 }, { "epoch": 0.48, "grad_norm": 5.595152854919434, "learning_rate": 1.9242425485499293e-06, "loss": 0.7315, "step": 3303 }, { "epoch": 0.48, "grad_norm": 5.4870381355285645, "learning_rate": 1.9241821037564734e-06, "loss": 0.6426, "step": 3304 }, { "epoch": 0.48, "grad_norm": 7.141200542449951, "learning_rate": 1.9241216358091364e-06, "loss": 0.8016, "step": 3305 }, { "epoch": 0.48, "grad_norm": 5.900515556335449, "learning_rate": 1.9240611447094337e-06, "loss": 0.772, "step": 3306 }, { "epoch": 0.48, "grad_norm": 5.937533378601074, "learning_rate": 1.92400063045888e-06, "loss": 0.6849, "step": 3307 }, { "epoch": 0.48, "grad_norm": 5.422811985015869, "learning_rate": 1.9239400930589924e-06, "loss": 0.698, "step": 3308 }, { "epoch": 0.48, "grad_norm": 6.691211223602295, "learning_rate": 1.9238795325112867e-06, "loss": 0.7681, "step": 3309 }, { "epoch": 0.48, "grad_norm": 6.041096210479736, "learning_rate": 1.9238189488172803e-06, "loss": 0.7592, "step": 3310 }, { "epoch": 0.48, "grad_norm": 6.554324150085449, "learning_rate": 1.9237583419784917e-06, "loss": 0.7912, "step": 3311 }, { "epoch": 0.48, "grad_norm": 6.12026309967041, "learning_rate": 1.9236977119964383e-06, "loss": 0.7813, "step": 3312 }, { "epoch": 0.48, "grad_norm": 5.436239719390869, "learning_rate": 1.9236370588726397e-06, "loss": 0.6739, "step": 3313 }, { "epoch": 0.48, "grad_norm": 5.553699016571045, "learning_rate": 1.9235763826086154e-06, "loss": 0.7719, "step": 3314 }, { "epoch": 0.48, "grad_norm": 5.956609725952148, "learning_rate": 1.9235156832058855e-06, "loss": 0.7836, "step": 3315 }, { "epoch": 0.48, "grad_norm": 4.876408100128174, "learning_rate": 1.9234549606659707e-06, "loss": 0.6515, "step": 3316 }, { "epoch": 0.48, "grad_norm": 5.745640754699707, "learning_rate": 1.9233942149903925e-06, "loss": 0.6837, "step": 3317 }, { "epoch": 0.48, "grad_norm": 6.357946872711182, "learning_rate": 1.9233334461806724e-06, "loss": 0.7682, "step": 3318 }, { "epoch": 0.48, "grad_norm": 5.8117265701293945, "learning_rate": 1.923272654238333e-06, "loss": 0.6777, "step": 3319 }, { "epoch": 0.48, "grad_norm": 5.914271831512451, "learning_rate": 1.9232118391648973e-06, "loss": 0.7626, "step": 3320 }, { "epoch": 0.48, "grad_norm": 5.916546821594238, "learning_rate": 1.9231510009618895e-06, "loss": 0.7458, "step": 3321 }, { "epoch": 0.48, "grad_norm": 6.193889617919922, "learning_rate": 1.9230901396308326e-06, "loss": 0.7036, "step": 3322 }, { "epoch": 0.48, "grad_norm": 6.396361827850342, "learning_rate": 1.923029255173253e-06, "loss": 0.733, "step": 3323 }, { "epoch": 0.48, "grad_norm": 6.277126312255859, "learning_rate": 1.9229683475906743e-06, "loss": 0.7496, "step": 3324 }, { "epoch": 0.48, "grad_norm": 5.895627021789551, "learning_rate": 1.922907416884624e-06, "loss": 0.6639, "step": 3325 }, { "epoch": 0.48, "grad_norm": 5.8099517822265625, "learning_rate": 1.9228464630566277e-06, "loss": 0.8366, "step": 3326 }, { "epoch": 0.48, "grad_norm": 6.132108211517334, "learning_rate": 1.922785486108213e-06, "loss": 0.7481, "step": 3327 }, { "epoch": 0.48, "grad_norm": 5.805564880371094, "learning_rate": 1.9227244860409065e-06, "loss": 0.7935, "step": 3328 }, { "epoch": 0.48, "grad_norm": 6.225445747375488, "learning_rate": 1.922663462856238e-06, "loss": 0.7858, "step": 3329 }, { "epoch": 0.48, "grad_norm": 5.826079368591309, "learning_rate": 1.9226024165557357e-06, "loss": 0.6846, "step": 3330 }, { "epoch": 0.48, "grad_norm": 5.620533466339111, "learning_rate": 1.9225413471409285e-06, "loss": 0.7631, "step": 3331 }, { "epoch": 0.48, "grad_norm": 6.403103351593018, "learning_rate": 1.922480254613347e-06, "loss": 0.8632, "step": 3332 }, { "epoch": 0.48, "grad_norm": 5.799767017364502, "learning_rate": 1.9224191389745213e-06, "loss": 0.7277, "step": 3333 }, { "epoch": 0.48, "grad_norm": 5.949685096740723, "learning_rate": 1.922358000225983e-06, "loss": 0.7924, "step": 3334 }, { "epoch": 0.48, "grad_norm": 5.79690408706665, "learning_rate": 1.922296838369264e-06, "loss": 0.6799, "step": 3335 }, { "epoch": 0.48, "grad_norm": 6.046550750732422, "learning_rate": 1.922235653405896e-06, "loss": 0.6825, "step": 3336 }, { "epoch": 0.48, "grad_norm": 6.254810333251953, "learning_rate": 1.922174445337412e-06, "loss": 0.7589, "step": 3337 }, { "epoch": 0.48, "grad_norm": 6.071170806884766, "learning_rate": 1.9221132141653464e-06, "loss": 0.7589, "step": 3338 }, { "epoch": 0.48, "grad_norm": 6.39293098449707, "learning_rate": 1.922051959891232e-06, "loss": 0.7603, "step": 3339 }, { "epoch": 0.48, "grad_norm": 6.597862243652344, "learning_rate": 1.9219906825166035e-06, "loss": 0.7539, "step": 3340 }, { "epoch": 0.48, "grad_norm": 5.317164897918701, "learning_rate": 1.9219293820429973e-06, "loss": 0.6863, "step": 3341 }, { "epoch": 0.48, "grad_norm": 5.667217254638672, "learning_rate": 1.921868058471948e-06, "loss": 0.7427, "step": 3342 }, { "epoch": 0.49, "grad_norm": 5.256931304931641, "learning_rate": 1.9218067118049924e-06, "loss": 0.7409, "step": 3343 }, { "epoch": 0.49, "grad_norm": 5.594327926635742, "learning_rate": 1.921745342043667e-06, "loss": 0.7392, "step": 3344 }, { "epoch": 0.49, "grad_norm": 6.0819573402404785, "learning_rate": 1.9216839491895104e-06, "loss": 0.8048, "step": 3345 }, { "epoch": 0.49, "grad_norm": 5.781292915344238, "learning_rate": 1.92162253324406e-06, "loss": 0.6974, "step": 3346 }, { "epoch": 0.49, "grad_norm": 6.852031707763672, "learning_rate": 1.9215610942088542e-06, "loss": 0.7139, "step": 3347 }, { "epoch": 0.49, "grad_norm": 5.613796710968018, "learning_rate": 1.9214996320854327e-06, "loss": 0.7155, "step": 3348 }, { "epoch": 0.49, "grad_norm": 5.907839775085449, "learning_rate": 1.9214381468753353e-06, "loss": 0.7036, "step": 3349 }, { "epoch": 0.49, "grad_norm": 5.420458793640137, "learning_rate": 1.921376638580102e-06, "loss": 0.7454, "step": 3350 }, { "epoch": 0.49, "grad_norm": 6.254887104034424, "learning_rate": 1.9213151072012745e-06, "loss": 0.7256, "step": 3351 }, { "epoch": 0.49, "grad_norm": 5.386151313781738, "learning_rate": 1.921253552740394e-06, "loss": 0.7422, "step": 3352 }, { "epoch": 0.49, "grad_norm": 5.420230865478516, "learning_rate": 1.921191975199002e-06, "loss": 0.6821, "step": 3353 }, { "epoch": 0.49, "grad_norm": 5.746496200561523, "learning_rate": 1.921130374578642e-06, "loss": 0.5888, "step": 3354 }, { "epoch": 0.49, "grad_norm": 6.092179775238037, "learning_rate": 1.9210687508808576e-06, "loss": 0.7152, "step": 3355 }, { "epoch": 0.49, "grad_norm": 6.365753650665283, "learning_rate": 1.921007104107192e-06, "loss": 0.7101, "step": 3356 }, { "epoch": 0.49, "grad_norm": 5.674001693725586, "learning_rate": 1.92094543425919e-06, "loss": 0.7183, "step": 3357 }, { "epoch": 0.49, "grad_norm": 5.611743927001953, "learning_rate": 1.9208837413383963e-06, "loss": 0.6813, "step": 3358 }, { "epoch": 0.49, "grad_norm": 5.415349960327148, "learning_rate": 1.920822025346357e-06, "loss": 0.7316, "step": 3359 }, { "epoch": 0.49, "grad_norm": 5.566793918609619, "learning_rate": 1.920760286284618e-06, "loss": 0.7929, "step": 3360 }, { "epoch": 0.49, "grad_norm": 5.850174903869629, "learning_rate": 1.9206985241547256e-06, "loss": 0.708, "step": 3361 }, { "epoch": 0.49, "grad_norm": 5.745645523071289, "learning_rate": 1.920636738958228e-06, "loss": 0.7398, "step": 3362 }, { "epoch": 0.49, "grad_norm": 5.624738693237305, "learning_rate": 1.920574930696673e-06, "loss": 0.6873, "step": 3363 }, { "epoch": 0.49, "grad_norm": 5.948212146759033, "learning_rate": 1.9205130993716084e-06, "loss": 0.7303, "step": 3364 }, { "epoch": 0.49, "grad_norm": 5.752537727355957, "learning_rate": 1.920451244984584e-06, "loss": 0.6925, "step": 3365 }, { "epoch": 0.49, "grad_norm": 5.7289628982543945, "learning_rate": 1.9203893675371497e-06, "loss": 0.672, "step": 3366 }, { "epoch": 0.49, "grad_norm": 6.576335906982422, "learning_rate": 1.9203274670308547e-06, "loss": 0.7588, "step": 3367 }, { "epoch": 0.49, "grad_norm": 5.762106895446777, "learning_rate": 1.92026554346725e-06, "loss": 0.7256, "step": 3368 }, { "epoch": 0.49, "grad_norm": 6.366667747497559, "learning_rate": 1.9202035968478886e-06, "loss": 0.7416, "step": 3369 }, { "epoch": 0.49, "grad_norm": 6.3871846199035645, "learning_rate": 1.9201416271743206e-06, "loss": 0.7473, "step": 3370 }, { "epoch": 0.49, "grad_norm": 6.52690315246582, "learning_rate": 1.920079634448099e-06, "loss": 0.7458, "step": 3371 }, { "epoch": 0.49, "grad_norm": 5.832184314727783, "learning_rate": 1.9200176186707772e-06, "loss": 0.7584, "step": 3372 }, { "epoch": 0.49, "grad_norm": 6.698317527770996, "learning_rate": 1.9199555798439092e-06, "loss": 0.7587, "step": 3373 }, { "epoch": 0.49, "grad_norm": 5.714438438415527, "learning_rate": 1.9198935179690486e-06, "loss": 0.6923, "step": 3374 }, { "epoch": 0.49, "grad_norm": 6.533572196960449, "learning_rate": 1.919831433047751e-06, "loss": 0.7544, "step": 3375 }, { "epoch": 0.49, "grad_norm": 5.399411678314209, "learning_rate": 1.9197693250815708e-06, "loss": 0.7344, "step": 3376 }, { "epoch": 0.49, "grad_norm": 6.0824456214904785, "learning_rate": 1.9197071940720648e-06, "loss": 0.6021, "step": 3377 }, { "epoch": 0.49, "grad_norm": 5.256229400634766, "learning_rate": 1.9196450400207897e-06, "loss": 0.7102, "step": 3378 }, { "epoch": 0.49, "grad_norm": 5.295226097106934, "learning_rate": 1.9195828629293024e-06, "loss": 0.6725, "step": 3379 }, { "epoch": 0.49, "grad_norm": 5.930196762084961, "learning_rate": 1.91952066279916e-06, "loss": 0.7546, "step": 3380 }, { "epoch": 0.49, "grad_norm": 6.061540126800537, "learning_rate": 1.919458439631922e-06, "loss": 0.7268, "step": 3381 }, { "epoch": 0.49, "grad_norm": 5.1089301109313965, "learning_rate": 1.9193961934291465e-06, "loss": 0.676, "step": 3382 }, { "epoch": 0.49, "grad_norm": 5.921634197235107, "learning_rate": 1.9193339241923936e-06, "loss": 0.7221, "step": 3383 }, { "epoch": 0.49, "grad_norm": 5.087174415588379, "learning_rate": 1.919271631923223e-06, "loss": 0.7476, "step": 3384 }, { "epoch": 0.49, "grad_norm": 6.000561237335205, "learning_rate": 1.9192093166231953e-06, "loss": 0.7573, "step": 3385 }, { "epoch": 0.49, "grad_norm": 5.388651371002197, "learning_rate": 1.9191469782938712e-06, "loss": 0.7179, "step": 3386 }, { "epoch": 0.49, "grad_norm": 6.20790958404541, "learning_rate": 1.9190846169368134e-06, "loss": 0.6291, "step": 3387 }, { "epoch": 0.49, "grad_norm": 6.123687267303467, "learning_rate": 1.919022232553584e-06, "loss": 0.6921, "step": 3388 }, { "epoch": 0.49, "grad_norm": 6.599093914031982, "learning_rate": 1.9189598251457456e-06, "loss": 0.7674, "step": 3389 }, { "epoch": 0.49, "grad_norm": 5.528442859649658, "learning_rate": 1.918897394714862e-06, "loss": 0.7043, "step": 3390 }, { "epoch": 0.49, "grad_norm": 5.5489678382873535, "learning_rate": 1.918834941262497e-06, "loss": 0.743, "step": 3391 }, { "epoch": 0.49, "grad_norm": 5.941228866577148, "learning_rate": 1.918772464790216e-06, "loss": 0.7545, "step": 3392 }, { "epoch": 0.49, "grad_norm": 5.843550682067871, "learning_rate": 1.9187099652995833e-06, "loss": 0.7178, "step": 3393 }, { "epoch": 0.49, "grad_norm": 6.256938457489014, "learning_rate": 1.918647442792165e-06, "loss": 0.6996, "step": 3394 }, { "epoch": 0.49, "grad_norm": 5.551278591156006, "learning_rate": 1.9185848972695283e-06, "loss": 0.7198, "step": 3395 }, { "epoch": 0.49, "grad_norm": 5.547615051269531, "learning_rate": 1.918522328733239e-06, "loss": 0.7008, "step": 3396 }, { "epoch": 0.49, "grad_norm": 5.919034957885742, "learning_rate": 1.9184597371848652e-06, "loss": 0.8289, "step": 3397 }, { "epoch": 0.49, "grad_norm": 5.655450820922852, "learning_rate": 1.9183971226259756e-06, "loss": 0.741, "step": 3398 }, { "epoch": 0.49, "grad_norm": 5.441249370574951, "learning_rate": 1.9183344850581378e-06, "loss": 0.6853, "step": 3399 }, { "epoch": 0.49, "grad_norm": 6.864324569702148, "learning_rate": 1.918271824482922e-06, "loss": 0.705, "step": 3400 }, { "epoch": 0.49, "grad_norm": 6.290637969970703, "learning_rate": 1.918209140901897e-06, "loss": 0.7642, "step": 3401 }, { "epoch": 0.49, "grad_norm": 5.8647685050964355, "learning_rate": 1.918146434316635e-06, "loss": 0.7109, "step": 3402 }, { "epoch": 0.49, "grad_norm": 6.745965480804443, "learning_rate": 1.918083704728705e-06, "loss": 0.7642, "step": 3403 }, { "epoch": 0.49, "grad_norm": 5.5796799659729, "learning_rate": 1.91802095213968e-06, "loss": 0.7869, "step": 3404 }, { "epoch": 0.49, "grad_norm": 5.961258888244629, "learning_rate": 1.9179581765511313e-06, "loss": 0.7595, "step": 3405 }, { "epoch": 0.49, "grad_norm": 6.13016939163208, "learning_rate": 1.917895377964632e-06, "loss": 0.7646, "step": 3406 }, { "epoch": 0.49, "grad_norm": 5.752612113952637, "learning_rate": 1.917832556381756e-06, "loss": 0.741, "step": 3407 }, { "epoch": 0.49, "grad_norm": 5.702184677124023, "learning_rate": 1.9177697118040757e-06, "loss": 0.6972, "step": 3408 }, { "epoch": 0.49, "grad_norm": 5.242104530334473, "learning_rate": 1.917706844233167e-06, "loss": 0.705, "step": 3409 }, { "epoch": 0.49, "grad_norm": 5.44438362121582, "learning_rate": 1.917643953670605e-06, "loss": 0.779, "step": 3410 }, { "epoch": 0.49, "grad_norm": 6.429230213165283, "learning_rate": 1.917581040117964e-06, "loss": 0.6731, "step": 3411 }, { "epoch": 0.5, "grad_norm": 5.2593512535095215, "learning_rate": 1.917518103576821e-06, "loss": 0.7137, "step": 3412 }, { "epoch": 0.5, "grad_norm": 5.797011375427246, "learning_rate": 1.9174551440487533e-06, "loss": 0.7815, "step": 3413 }, { "epoch": 0.5, "grad_norm": 6.567819118499756, "learning_rate": 1.917392161535337e-06, "loss": 0.7152, "step": 3414 }, { "epoch": 0.5, "grad_norm": 6.363500118255615, "learning_rate": 1.917329156038151e-06, "loss": 0.7201, "step": 3415 }, { "epoch": 0.5, "grad_norm": 5.939727306365967, "learning_rate": 1.9172661275587735e-06, "loss": 0.7048, "step": 3416 }, { "epoch": 0.5, "grad_norm": 5.877410888671875, "learning_rate": 1.9172030760987833e-06, "loss": 0.6757, "step": 3417 }, { "epoch": 0.5, "grad_norm": 5.719203472137451, "learning_rate": 1.9171400016597607e-06, "loss": 0.697, "step": 3418 }, { "epoch": 0.5, "grad_norm": 5.255046367645264, "learning_rate": 1.9170769042432855e-06, "loss": 0.6584, "step": 3419 }, { "epoch": 0.5, "grad_norm": 5.599647045135498, "learning_rate": 1.9170137838509383e-06, "loss": 0.6673, "step": 3420 }, { "epoch": 0.5, "grad_norm": 5.77912712097168, "learning_rate": 1.916950640484301e-06, "loss": 0.7412, "step": 3421 }, { "epoch": 0.5, "grad_norm": 5.7819366455078125, "learning_rate": 1.916887474144955e-06, "loss": 0.7063, "step": 3422 }, { "epoch": 0.5, "grad_norm": 6.195981025695801, "learning_rate": 1.9168242848344837e-06, "loss": 0.8051, "step": 3423 }, { "epoch": 0.5, "grad_norm": 6.5106353759765625, "learning_rate": 1.9167610725544693e-06, "loss": 0.7194, "step": 3424 }, { "epoch": 0.5, "grad_norm": 5.8485493659973145, "learning_rate": 1.9166978373064954e-06, "loss": 0.7322, "step": 3425 }, { "epoch": 0.5, "grad_norm": 5.576022624969482, "learning_rate": 1.916634579092147e-06, "loss": 0.7598, "step": 3426 }, { "epoch": 0.5, "grad_norm": 5.943802356719971, "learning_rate": 1.9165712979130087e-06, "loss": 0.6967, "step": 3427 }, { "epoch": 0.5, "grad_norm": 6.146100044250488, "learning_rate": 1.916507993770666e-06, "loss": 0.708, "step": 3428 }, { "epoch": 0.5, "grad_norm": 5.716383934020996, "learning_rate": 1.9164446666667043e-06, "loss": 0.7734, "step": 3429 }, { "epoch": 0.5, "grad_norm": 6.077751159667969, "learning_rate": 1.9163813166027105e-06, "loss": 0.722, "step": 3430 }, { "epoch": 0.5, "grad_norm": 5.864166259765625, "learning_rate": 1.916317943580272e-06, "loss": 0.7376, "step": 3431 }, { "epoch": 0.5, "grad_norm": 5.261649131774902, "learning_rate": 1.916254547600976e-06, "loss": 0.7379, "step": 3432 }, { "epoch": 0.5, "grad_norm": 6.416403770446777, "learning_rate": 1.9161911286664116e-06, "loss": 0.781, "step": 3433 }, { "epoch": 0.5, "grad_norm": 5.916640281677246, "learning_rate": 1.9161276867781666e-06, "loss": 0.7673, "step": 3434 }, { "epoch": 0.5, "grad_norm": 6.172986030578613, "learning_rate": 1.916064221937831e-06, "loss": 0.8301, "step": 3435 }, { "epoch": 0.5, "grad_norm": 5.564651012420654, "learning_rate": 1.9160007341469952e-06, "loss": 0.7236, "step": 3436 }, { "epoch": 0.5, "grad_norm": 5.121835231781006, "learning_rate": 1.915937223407249e-06, "loss": 0.7335, "step": 3437 }, { "epoch": 0.5, "grad_norm": 5.539483070373535, "learning_rate": 1.9158736897201837e-06, "loss": 0.7865, "step": 3438 }, { "epoch": 0.5, "grad_norm": 6.046097755432129, "learning_rate": 1.9158101330873915e-06, "loss": 0.7567, "step": 3439 }, { "epoch": 0.5, "grad_norm": 5.718946933746338, "learning_rate": 1.9157465535104648e-06, "loss": 0.6883, "step": 3440 }, { "epoch": 0.5, "grad_norm": 5.699872970581055, "learning_rate": 1.9156829509909954e-06, "loss": 0.7144, "step": 3441 }, { "epoch": 0.5, "grad_norm": 6.36118745803833, "learning_rate": 1.915619325530578e-06, "loss": 0.7577, "step": 3442 }, { "epoch": 0.5, "grad_norm": 5.882039546966553, "learning_rate": 1.9155556771308064e-06, "loss": 0.7059, "step": 3443 }, { "epoch": 0.5, "grad_norm": 5.770360946655273, "learning_rate": 1.9154920057932745e-06, "loss": 0.7665, "step": 3444 }, { "epoch": 0.5, "grad_norm": 5.540988922119141, "learning_rate": 1.915428311519578e-06, "loss": 0.7243, "step": 3445 }, { "epoch": 0.5, "grad_norm": 6.194289684295654, "learning_rate": 1.9153645943113126e-06, "loss": 0.7243, "step": 3446 }, { "epoch": 0.5, "grad_norm": 5.5811238288879395, "learning_rate": 1.9153008541700745e-06, "loss": 0.7644, "step": 3447 }, { "epoch": 0.5, "grad_norm": 5.5770978927612305, "learning_rate": 1.9152370910974603e-06, "loss": 0.667, "step": 3448 }, { "epoch": 0.5, "grad_norm": 5.673558712005615, "learning_rate": 1.9151733050950684e-06, "loss": 0.7324, "step": 3449 }, { "epoch": 0.5, "grad_norm": 5.884384632110596, "learning_rate": 1.915109496164496e-06, "loss": 0.6947, "step": 3450 }, { "epoch": 0.5, "grad_norm": 5.642599582672119, "learning_rate": 1.915045664307343e-06, "loss": 0.7538, "step": 3451 }, { "epoch": 0.5, "grad_norm": 5.8913750648498535, "learning_rate": 1.914981809525207e-06, "loss": 0.781, "step": 3452 }, { "epoch": 0.5, "grad_norm": 5.317554473876953, "learning_rate": 1.914917931819688e-06, "loss": 0.7227, "step": 3453 }, { "epoch": 0.5, "grad_norm": 5.990400314331055, "learning_rate": 1.9148540311923873e-06, "loss": 0.717, "step": 3454 }, { "epoch": 0.5, "grad_norm": 6.005760669708252, "learning_rate": 1.914790107644905e-06, "loss": 0.7242, "step": 3455 }, { "epoch": 0.5, "grad_norm": 5.592568397521973, "learning_rate": 1.914726161178844e-06, "loss": 0.7086, "step": 3456 }, { "epoch": 0.5, "grad_norm": 5.887679100036621, "learning_rate": 1.914662191795804e-06, "loss": 0.6714, "step": 3457 }, { "epoch": 0.5, "grad_norm": 6.810563087463379, "learning_rate": 1.91459819949739e-06, "loss": 0.7388, "step": 3458 }, { "epoch": 0.5, "grad_norm": 5.72318696975708, "learning_rate": 1.9145341842852036e-06, "loss": 0.6979, "step": 3459 }, { "epoch": 0.5, "grad_norm": 6.0525312423706055, "learning_rate": 1.914470146160849e-06, "loss": 0.7319, "step": 3460 }, { "epoch": 0.5, "grad_norm": 6.265303611755371, "learning_rate": 1.914406085125931e-06, "loss": 0.6948, "step": 3461 }, { "epoch": 0.5, "grad_norm": 6.036438465118408, "learning_rate": 1.9143420011820543e-06, "loss": 0.7123, "step": 3462 }, { "epoch": 0.5, "grad_norm": 6.0735321044921875, "learning_rate": 1.914277894330824e-06, "loss": 0.7761, "step": 3463 }, { "epoch": 0.5, "grad_norm": 5.251015663146973, "learning_rate": 1.9142137645738472e-06, "loss": 0.7457, "step": 3464 }, { "epoch": 0.5, "grad_norm": 5.58053731918335, "learning_rate": 1.9141496119127298e-06, "loss": 0.7818, "step": 3465 }, { "epoch": 0.5, "grad_norm": 5.832013130187988, "learning_rate": 1.9140854363490793e-06, "loss": 0.6647, "step": 3466 }, { "epoch": 0.5, "grad_norm": 6.687292575836182, "learning_rate": 1.9140212378845032e-06, "loss": 0.6933, "step": 3467 }, { "epoch": 0.5, "grad_norm": 6.311882019042969, "learning_rate": 1.9139570165206103e-06, "loss": 0.7824, "step": 3468 }, { "epoch": 0.5, "grad_norm": 5.834561824798584, "learning_rate": 1.913892772259009e-06, "loss": 0.73, "step": 3469 }, { "epoch": 0.5, "grad_norm": 5.674725532531738, "learning_rate": 1.9138285051013095e-06, "loss": 0.6483, "step": 3470 }, { "epoch": 0.5, "grad_norm": 5.244794845581055, "learning_rate": 1.913764215049122e-06, "loss": 0.7218, "step": 3471 }, { "epoch": 0.5, "grad_norm": 6.499538898468018, "learning_rate": 1.913699902104056e-06, "loss": 0.7466, "step": 3472 }, { "epoch": 0.5, "grad_norm": 4.919773578643799, "learning_rate": 1.9136355662677245e-06, "loss": 0.6814, "step": 3473 }, { "epoch": 0.5, "grad_norm": 5.236969470977783, "learning_rate": 1.9135712075417377e-06, "loss": 0.6816, "step": 3474 }, { "epoch": 0.5, "grad_norm": 5.029886722564697, "learning_rate": 1.9135068259277087e-06, "loss": 0.7196, "step": 3475 }, { "epoch": 0.5, "grad_norm": 5.391006946563721, "learning_rate": 1.913442421427251e-06, "loss": 0.6755, "step": 3476 }, { "epoch": 0.5, "grad_norm": 5.464422225952148, "learning_rate": 1.913377994041977e-06, "loss": 0.7437, "step": 3477 }, { "epoch": 0.5, "grad_norm": 5.945383071899414, "learning_rate": 1.9133135437735017e-06, "loss": 0.7043, "step": 3478 }, { "epoch": 0.5, "grad_norm": 6.704283237457275, "learning_rate": 1.9132490706234396e-06, "loss": 0.8273, "step": 3479 }, { "epoch": 0.5, "grad_norm": 5.306393146514893, "learning_rate": 1.9131845745934058e-06, "loss": 0.7867, "step": 3480 }, { "epoch": 0.51, "grad_norm": 5.480658531188965, "learning_rate": 1.9131200556850163e-06, "loss": 0.6925, "step": 3481 }, { "epoch": 0.51, "grad_norm": 6.53851842880249, "learning_rate": 1.913055513899887e-06, "loss": 0.7027, "step": 3482 }, { "epoch": 0.51, "grad_norm": 5.23018741607666, "learning_rate": 1.912990949239636e-06, "loss": 0.6676, "step": 3483 }, { "epoch": 0.51, "grad_norm": 6.058534145355225, "learning_rate": 1.91292636170588e-06, "loss": 0.6599, "step": 3484 }, { "epoch": 0.51, "grad_norm": 6.069147109985352, "learning_rate": 1.9128617513002374e-06, "loss": 0.6765, "step": 3485 }, { "epoch": 0.51, "grad_norm": 5.789342403411865, "learning_rate": 1.912797118024327e-06, "loss": 0.7522, "step": 3486 }, { "epoch": 0.51, "grad_norm": 5.214632511138916, "learning_rate": 1.9127324618797676e-06, "loss": 0.7221, "step": 3487 }, { "epoch": 0.51, "grad_norm": 5.861779689788818, "learning_rate": 1.9126677828681797e-06, "loss": 0.6585, "step": 3488 }, { "epoch": 0.51, "grad_norm": 5.61206579208374, "learning_rate": 1.9126030809911834e-06, "loss": 0.7431, "step": 3489 }, { "epoch": 0.51, "grad_norm": 6.540902614593506, "learning_rate": 1.9125383562504e-06, "loss": 0.7763, "step": 3490 }, { "epoch": 0.51, "grad_norm": 5.97964334487915, "learning_rate": 1.9124736086474503e-06, "loss": 0.7481, "step": 3491 }, { "epoch": 0.51, "grad_norm": 7.257835388183594, "learning_rate": 1.912408838183957e-06, "loss": 0.8121, "step": 3492 }, { "epoch": 0.51, "grad_norm": 5.990832328796387, "learning_rate": 1.912344044861543e-06, "loss": 0.7999, "step": 3493 }, { "epoch": 0.51, "grad_norm": 5.997809886932373, "learning_rate": 1.912279228681831e-06, "loss": 0.7661, "step": 3494 }, { "epoch": 0.51, "grad_norm": 5.174936294555664, "learning_rate": 1.9122143896464455e-06, "loss": 0.7017, "step": 3495 }, { "epoch": 0.51, "grad_norm": 6.330695629119873, "learning_rate": 1.9121495277570108e-06, "loss": 0.7337, "step": 3496 }, { "epoch": 0.51, "grad_norm": 6.717465877532959, "learning_rate": 1.9120846430151515e-06, "loss": 0.7315, "step": 3497 }, { "epoch": 0.51, "grad_norm": 6.209592819213867, "learning_rate": 1.9120197354224935e-06, "loss": 0.7248, "step": 3498 }, { "epoch": 0.51, "grad_norm": 7.279424667358398, "learning_rate": 1.911954804980663e-06, "loss": 0.7852, "step": 3499 }, { "epoch": 0.51, "grad_norm": 5.294290542602539, "learning_rate": 1.911889851691287e-06, "loss": 0.7486, "step": 3500 }, { "epoch": 0.51, "grad_norm": 6.2176127433776855, "learning_rate": 1.9118248755559918e-06, "loss": 0.8203, "step": 3501 }, { "epoch": 0.51, "grad_norm": 6.108304023742676, "learning_rate": 1.911759876576406e-06, "loss": 0.7694, "step": 3502 }, { "epoch": 0.51, "grad_norm": 6.072579860687256, "learning_rate": 1.911694854754158e-06, "loss": 0.8683, "step": 3503 }, { "epoch": 0.51, "grad_norm": 6.068160533905029, "learning_rate": 1.911629810090877e-06, "loss": 0.7592, "step": 3504 }, { "epoch": 0.51, "grad_norm": 5.512768745422363, "learning_rate": 1.911564742588192e-06, "loss": 0.6587, "step": 3505 }, { "epoch": 0.51, "grad_norm": 5.863519191741943, "learning_rate": 1.9114996522477337e-06, "loss": 0.6968, "step": 3506 }, { "epoch": 0.51, "grad_norm": 6.348076820373535, "learning_rate": 1.9114345390711326e-06, "loss": 0.8192, "step": 3507 }, { "epoch": 0.51, "grad_norm": 5.8047966957092285, "learning_rate": 1.91136940306002e-06, "loss": 0.7575, "step": 3508 }, { "epoch": 0.51, "grad_norm": 5.758545875549316, "learning_rate": 1.911304244216028e-06, "loss": 0.7341, "step": 3509 }, { "epoch": 0.51, "grad_norm": 5.602414608001709, "learning_rate": 1.9112390625407887e-06, "loss": 0.6672, "step": 3510 }, { "epoch": 0.51, "grad_norm": 5.604131698608398, "learning_rate": 1.9111738580359352e-06, "loss": 0.6699, "step": 3511 }, { "epoch": 0.51, "grad_norm": 5.933439254760742, "learning_rate": 1.9111086307031017e-06, "loss": 0.7506, "step": 3512 }, { "epoch": 0.51, "grad_norm": 6.232447147369385, "learning_rate": 1.9110433805439216e-06, "loss": 0.7216, "step": 3513 }, { "epoch": 0.51, "grad_norm": 9.43042278289795, "learning_rate": 1.9109781075600297e-06, "loss": 0.8017, "step": 3514 }, { "epoch": 0.51, "grad_norm": 5.312131404876709, "learning_rate": 1.9109128117530616e-06, "loss": 0.7855, "step": 3515 }, { "epoch": 0.51, "grad_norm": 6.221879959106445, "learning_rate": 1.910847493124653e-06, "loss": 0.8261, "step": 3516 }, { "epoch": 0.51, "grad_norm": 5.616311550140381, "learning_rate": 1.9107821516764407e-06, "loss": 0.7762, "step": 3517 }, { "epoch": 0.51, "grad_norm": 5.897691249847412, "learning_rate": 1.910716787410061e-06, "loss": 0.714, "step": 3518 }, { "epoch": 0.51, "grad_norm": 6.079854965209961, "learning_rate": 1.910651400327152e-06, "loss": 0.6667, "step": 3519 }, { "epoch": 0.51, "grad_norm": 6.56135892868042, "learning_rate": 1.9105859904293523e-06, "loss": 0.7156, "step": 3520 }, { "epoch": 0.51, "grad_norm": 5.585108280181885, "learning_rate": 1.9105205577182997e-06, "loss": 0.6995, "step": 3521 }, { "epoch": 0.51, "grad_norm": 5.698014259338379, "learning_rate": 1.9104551021956343e-06, "loss": 0.6669, "step": 3522 }, { "epoch": 0.51, "grad_norm": 5.877703666687012, "learning_rate": 1.910389623862996e-06, "loss": 0.7434, "step": 3523 }, { "epoch": 0.51, "grad_norm": 5.944859981536865, "learning_rate": 1.9103241227220242e-06, "loss": 0.6677, "step": 3524 }, { "epoch": 0.51, "grad_norm": 6.076114654541016, "learning_rate": 1.910258598774361e-06, "loss": 0.7399, "step": 3525 }, { "epoch": 0.51, "grad_norm": 5.830374240875244, "learning_rate": 1.9101930520216473e-06, "loss": 0.7567, "step": 3526 }, { "epoch": 0.51, "grad_norm": 5.849496364593506, "learning_rate": 1.9101274824655256e-06, "loss": 0.7338, "step": 3527 }, { "epoch": 0.51, "grad_norm": 6.033260822296143, "learning_rate": 1.910061890107639e-06, "loss": 0.7814, "step": 3528 }, { "epoch": 0.51, "grad_norm": 6.401756763458252, "learning_rate": 1.90999627494963e-06, "loss": 0.7051, "step": 3529 }, { "epoch": 0.51, "grad_norm": 5.664691925048828, "learning_rate": 1.909930636993143e-06, "loss": 0.655, "step": 3530 }, { "epoch": 0.51, "grad_norm": 6.36619234085083, "learning_rate": 1.9098649762398227e-06, "loss": 0.6737, "step": 3531 }, { "epoch": 0.51, "grad_norm": 5.873266220092773, "learning_rate": 1.909799292691314e-06, "loss": 0.6836, "step": 3532 }, { "epoch": 0.51, "grad_norm": 5.542755126953125, "learning_rate": 1.9097335863492615e-06, "loss": 0.6365, "step": 3533 }, { "epoch": 0.51, "grad_norm": 5.927022457122803, "learning_rate": 1.9096678572153127e-06, "loss": 0.7423, "step": 3534 }, { "epoch": 0.51, "grad_norm": 5.6310343742370605, "learning_rate": 1.9096021052911136e-06, "loss": 0.8417, "step": 3535 }, { "epoch": 0.51, "grad_norm": 6.478166580200195, "learning_rate": 1.9095363305783117e-06, "loss": 0.734, "step": 3536 }, { "epoch": 0.51, "grad_norm": 5.641985893249512, "learning_rate": 1.909470533078555e-06, "loss": 0.7623, "step": 3537 }, { "epoch": 0.51, "grad_norm": 6.176705360412598, "learning_rate": 1.9094047127934916e-06, "loss": 0.7079, "step": 3538 }, { "epoch": 0.51, "grad_norm": 5.733252048492432, "learning_rate": 1.909338869724771e-06, "loss": 0.7868, "step": 3539 }, { "epoch": 0.51, "grad_norm": 5.919919490814209, "learning_rate": 1.9092730038740424e-06, "loss": 0.7852, "step": 3540 }, { "epoch": 0.51, "grad_norm": 6.2333245277404785, "learning_rate": 1.909207115242956e-06, "loss": 0.7652, "step": 3541 }, { "epoch": 0.51, "grad_norm": 5.968531608581543, "learning_rate": 1.909141203833163e-06, "loss": 0.7013, "step": 3542 }, { "epoch": 0.51, "grad_norm": 5.628528594970703, "learning_rate": 1.909075269646314e-06, "loss": 0.7302, "step": 3543 }, { "epoch": 0.51, "grad_norm": 5.425911903381348, "learning_rate": 1.9090093126840614e-06, "loss": 0.757, "step": 3544 }, { "epoch": 0.51, "grad_norm": 7.11111307144165, "learning_rate": 1.9089433329480574e-06, "loss": 0.6995, "step": 3545 }, { "epoch": 0.51, "grad_norm": 5.787323474884033, "learning_rate": 1.908877330439955e-06, "loss": 0.7162, "step": 3546 }, { "epoch": 0.51, "grad_norm": 5.2426862716674805, "learning_rate": 1.908811305161408e-06, "loss": 0.6989, "step": 3547 }, { "epoch": 0.51, "grad_norm": 5.95505428314209, "learning_rate": 1.90874525711407e-06, "loss": 0.7093, "step": 3548 }, { "epoch": 0.51, "grad_norm": 5.847303867340088, "learning_rate": 1.908679186299597e-06, "loss": 0.728, "step": 3549 }, { "epoch": 0.52, "grad_norm": 5.316985607147217, "learning_rate": 1.9086130927196425e-06, "loss": 0.7168, "step": 3550 }, { "epoch": 0.52, "grad_norm": 6.293748378753662, "learning_rate": 1.908546976375864e-06, "loss": 0.7588, "step": 3551 }, { "epoch": 0.52, "grad_norm": 6.494590759277344, "learning_rate": 1.9084808372699166e-06, "loss": 0.8477, "step": 3552 }, { "epoch": 0.52, "grad_norm": 6.336389541625977, "learning_rate": 1.9084146754034585e-06, "loss": 0.7198, "step": 3553 }, { "epoch": 0.52, "grad_norm": 5.574106693267822, "learning_rate": 1.9083484907781462e-06, "loss": 0.7303, "step": 3554 }, { "epoch": 0.52, "grad_norm": 5.5341596603393555, "learning_rate": 1.9082822833956387e-06, "loss": 0.7423, "step": 3555 }, { "epoch": 0.52, "grad_norm": 5.567044258117676, "learning_rate": 1.9082160532575945e-06, "loss": 0.7411, "step": 3556 }, { "epoch": 0.52, "grad_norm": 6.244828224182129, "learning_rate": 1.908149800365673e-06, "loss": 0.7737, "step": 3557 }, { "epoch": 0.52, "grad_norm": 5.3224053382873535, "learning_rate": 1.908083524721533e-06, "loss": 0.6752, "step": 3558 }, { "epoch": 0.52, "grad_norm": 7.1221795082092285, "learning_rate": 1.9080172263268368e-06, "loss": 0.6693, "step": 3559 }, { "epoch": 0.52, "grad_norm": 5.737471580505371, "learning_rate": 1.9079509051832436e-06, "loss": 0.6079, "step": 3560 }, { "epoch": 0.52, "grad_norm": 5.723754405975342, "learning_rate": 1.907884561292416e-06, "loss": 0.7568, "step": 3561 }, { "epoch": 0.52, "grad_norm": 6.847891330718994, "learning_rate": 1.907818194656016e-06, "loss": 0.8528, "step": 3562 }, { "epoch": 0.52, "grad_norm": 5.1971330642700195, "learning_rate": 1.9077518052757056e-06, "loss": 0.7442, "step": 3563 }, { "epoch": 0.52, "grad_norm": 6.207911968231201, "learning_rate": 1.9076853931531493e-06, "loss": 0.7991, "step": 3564 }, { "epoch": 0.52, "grad_norm": 5.528689384460449, "learning_rate": 1.9076189582900102e-06, "loss": 0.7386, "step": 3565 }, { "epoch": 0.52, "grad_norm": 5.465053081512451, "learning_rate": 1.9075525006879528e-06, "loss": 0.7578, "step": 3566 }, { "epoch": 0.52, "grad_norm": 6.172939777374268, "learning_rate": 1.9074860203486418e-06, "loss": 0.8161, "step": 3567 }, { "epoch": 0.52, "grad_norm": 6.392536640167236, "learning_rate": 1.9074195172737434e-06, "loss": 0.7142, "step": 3568 }, { "epoch": 0.52, "grad_norm": 6.2166337966918945, "learning_rate": 1.9073529914649232e-06, "loss": 0.7137, "step": 3569 }, { "epoch": 0.52, "grad_norm": 5.840753555297852, "learning_rate": 1.9072864429238481e-06, "loss": 0.8173, "step": 3570 }, { "epoch": 0.52, "grad_norm": 5.700518608093262, "learning_rate": 1.9072198716521856e-06, "loss": 0.6816, "step": 3571 }, { "epoch": 0.52, "grad_norm": 5.626283645629883, "learning_rate": 1.9071532776516031e-06, "loss": 0.6942, "step": 3572 }, { "epoch": 0.52, "grad_norm": 6.195619583129883, "learning_rate": 1.9070866609237695e-06, "loss": 0.6909, "step": 3573 }, { "epoch": 0.52, "grad_norm": 7.0805158615112305, "learning_rate": 1.9070200214703532e-06, "loss": 0.7616, "step": 3574 }, { "epoch": 0.52, "grad_norm": 5.846312999725342, "learning_rate": 1.9069533592930239e-06, "loss": 0.7395, "step": 3575 }, { "epoch": 0.52, "grad_norm": 5.461990833282471, "learning_rate": 1.906886674393452e-06, "loss": 0.7628, "step": 3576 }, { "epoch": 0.52, "grad_norm": 6.194868087768555, "learning_rate": 1.906819966773308e-06, "loss": 0.7136, "step": 3577 }, { "epoch": 0.52, "grad_norm": 5.73901891708374, "learning_rate": 1.9067532364342629e-06, "loss": 0.7271, "step": 3578 }, { "epoch": 0.52, "grad_norm": 5.740878105163574, "learning_rate": 1.9066864833779892e-06, "loss": 0.6794, "step": 3579 }, { "epoch": 0.52, "grad_norm": 5.404134273529053, "learning_rate": 1.9066197076061585e-06, "loss": 0.7342, "step": 3580 }, { "epoch": 0.52, "grad_norm": 5.6450934410095215, "learning_rate": 1.9065529091204443e-06, "loss": 0.6947, "step": 3581 }, { "epoch": 0.52, "grad_norm": 5.640632629394531, "learning_rate": 1.90648608792252e-06, "loss": 0.7208, "step": 3582 }, { "epoch": 0.52, "grad_norm": 6.198341369628906, "learning_rate": 1.9064192440140597e-06, "loss": 0.8341, "step": 3583 }, { "epoch": 0.52, "grad_norm": 5.7831573486328125, "learning_rate": 1.9063523773967378e-06, "loss": 0.7754, "step": 3584 }, { "epoch": 0.52, "grad_norm": 5.290097713470459, "learning_rate": 1.90628548807223e-06, "loss": 0.7045, "step": 3585 }, { "epoch": 0.52, "grad_norm": 5.455639839172363, "learning_rate": 1.9062185760422116e-06, "loss": 0.6394, "step": 3586 }, { "epoch": 0.52, "grad_norm": 5.7096405029296875, "learning_rate": 1.9061516413083597e-06, "loss": 0.6526, "step": 3587 }, { "epoch": 0.52, "grad_norm": 5.89119291305542, "learning_rate": 1.9060846838723502e-06, "loss": 0.7193, "step": 3588 }, { "epoch": 0.52, "grad_norm": 5.690109729766846, "learning_rate": 1.9060177037358614e-06, "loss": 0.7506, "step": 3589 }, { "epoch": 0.52, "grad_norm": 7.193357944488525, "learning_rate": 1.905950700900571e-06, "loss": 0.7911, "step": 3590 }, { "epoch": 0.52, "grad_norm": 5.746833324432373, "learning_rate": 1.9058836753681582e-06, "loss": 0.7173, "step": 3591 }, { "epoch": 0.52, "grad_norm": 5.762775897979736, "learning_rate": 1.9058166271403016e-06, "loss": 0.7564, "step": 3592 }, { "epoch": 0.52, "grad_norm": 5.951423645019531, "learning_rate": 1.9057495562186813e-06, "loss": 0.7665, "step": 3593 }, { "epoch": 0.52, "grad_norm": 6.301435470581055, "learning_rate": 1.9056824626049775e-06, "loss": 0.8752, "step": 3594 }, { "epoch": 0.52, "grad_norm": 5.718709468841553, "learning_rate": 1.905615346300871e-06, "loss": 0.7364, "step": 3595 }, { "epoch": 0.52, "grad_norm": 5.678823471069336, "learning_rate": 1.9055482073080435e-06, "loss": 0.6739, "step": 3596 }, { "epoch": 0.52, "grad_norm": 5.6260666847229, "learning_rate": 1.9054810456281772e-06, "loss": 0.7029, "step": 3597 }, { "epoch": 0.52, "grad_norm": 6.227561950683594, "learning_rate": 1.9054138612629544e-06, "loss": 0.7292, "step": 3598 }, { "epoch": 0.52, "grad_norm": 5.625819206237793, "learning_rate": 1.9053466542140586e-06, "loss": 0.7931, "step": 3599 }, { "epoch": 0.52, "grad_norm": 5.698256015777588, "learning_rate": 1.9052794244831733e-06, "loss": 0.7357, "step": 3600 }, { "epoch": 0.52, "grad_norm": 6.053153038024902, "learning_rate": 1.9052121720719829e-06, "loss": 0.7433, "step": 3601 }, { "epoch": 0.52, "grad_norm": 5.989288806915283, "learning_rate": 1.9051448969821726e-06, "loss": 0.7781, "step": 3602 }, { "epoch": 0.52, "grad_norm": 5.852776050567627, "learning_rate": 1.9050775992154273e-06, "loss": 0.7035, "step": 3603 }, { "epoch": 0.52, "grad_norm": 5.635508060455322, "learning_rate": 1.9050102787734334e-06, "loss": 0.7992, "step": 3604 }, { "epoch": 0.52, "grad_norm": 5.245497226715088, "learning_rate": 1.904942935657878e-06, "loss": 0.6578, "step": 3605 }, { "epoch": 0.52, "grad_norm": 6.416044235229492, "learning_rate": 1.9048755698704468e-06, "loss": 0.7966, "step": 3606 }, { "epoch": 0.52, "grad_norm": 6.449345111846924, "learning_rate": 1.9048081814128291e-06, "loss": 0.7564, "step": 3607 }, { "epoch": 0.52, "grad_norm": 5.462984561920166, "learning_rate": 1.9047407702867124e-06, "loss": 0.7023, "step": 3608 }, { "epoch": 0.52, "grad_norm": 5.8797078132629395, "learning_rate": 1.904673336493786e-06, "loss": 0.7467, "step": 3609 }, { "epoch": 0.52, "grad_norm": 6.033429145812988, "learning_rate": 1.9046058800357385e-06, "loss": 0.7284, "step": 3610 }, { "epoch": 0.52, "grad_norm": 6.008631229400635, "learning_rate": 1.9045384009142608e-06, "loss": 0.7425, "step": 3611 }, { "epoch": 0.52, "grad_norm": 6.176610469818115, "learning_rate": 1.9044708991310433e-06, "loss": 0.8012, "step": 3612 }, { "epoch": 0.52, "grad_norm": 5.821704864501953, "learning_rate": 1.904403374687777e-06, "loss": 0.7003, "step": 3613 }, { "epoch": 0.52, "grad_norm": 5.339922904968262, "learning_rate": 1.9043358275861538e-06, "loss": 0.722, "step": 3614 }, { "epoch": 0.52, "grad_norm": 5.433483123779297, "learning_rate": 1.9042682578278657e-06, "loss": 0.7536, "step": 3615 }, { "epoch": 0.52, "grad_norm": 5.922638893127441, "learning_rate": 1.9042006654146054e-06, "loss": 0.7053, "step": 3616 }, { "epoch": 0.52, "grad_norm": 5.908493995666504, "learning_rate": 1.9041330503480667e-06, "loss": 0.776, "step": 3617 }, { "epoch": 0.52, "grad_norm": 5.729975700378418, "learning_rate": 1.904065412629944e-06, "loss": 0.6756, "step": 3618 }, { "epoch": 0.53, "grad_norm": 6.130620956420898, "learning_rate": 1.9039977522619308e-06, "loss": 0.7425, "step": 3619 }, { "epoch": 0.53, "grad_norm": 5.965631008148193, "learning_rate": 1.903930069245723e-06, "loss": 0.6582, "step": 3620 }, { "epoch": 0.53, "grad_norm": 5.315625190734863, "learning_rate": 1.9038623635830158e-06, "loss": 0.7749, "step": 3621 }, { "epoch": 0.53, "grad_norm": 5.713496685028076, "learning_rate": 1.903794635275506e-06, "loss": 0.7226, "step": 3622 }, { "epoch": 0.53, "grad_norm": 5.471790790557861, "learning_rate": 1.9037268843248899e-06, "loss": 0.6497, "step": 3623 }, { "epoch": 0.53, "grad_norm": 6.419527530670166, "learning_rate": 1.9036591107328652e-06, "loss": 0.8488, "step": 3624 }, { "epoch": 0.53, "grad_norm": 5.968979358673096, "learning_rate": 1.90359131450113e-06, "loss": 0.7697, "step": 3625 }, { "epoch": 0.53, "grad_norm": 5.779501438140869, "learning_rate": 1.9035234956313824e-06, "loss": 0.7937, "step": 3626 }, { "epoch": 0.53, "grad_norm": 5.250298500061035, "learning_rate": 1.9034556541253214e-06, "loss": 0.68, "step": 3627 }, { "epoch": 0.53, "grad_norm": 6.247089862823486, "learning_rate": 1.9033877899846473e-06, "loss": 0.7228, "step": 3628 }, { "epoch": 0.53, "grad_norm": 5.939414978027344, "learning_rate": 1.9033199032110598e-06, "loss": 0.6711, "step": 3629 }, { "epoch": 0.53, "grad_norm": 5.2174201011657715, "learning_rate": 1.9032519938062598e-06, "loss": 0.7923, "step": 3630 }, { "epoch": 0.53, "grad_norm": 6.125207901000977, "learning_rate": 1.9031840617719493e-06, "loss": 0.6977, "step": 3631 }, { "epoch": 0.53, "grad_norm": 5.989702224731445, "learning_rate": 1.9031161071098292e-06, "loss": 0.8056, "step": 3632 }, { "epoch": 0.53, "grad_norm": 5.9288835525512695, "learning_rate": 1.9030481298216023e-06, "loss": 0.7474, "step": 3633 }, { "epoch": 0.53, "grad_norm": 5.488428115844727, "learning_rate": 1.902980129908972e-06, "loss": 0.7618, "step": 3634 }, { "epoch": 0.53, "grad_norm": 5.554658889770508, "learning_rate": 1.9029121073736417e-06, "loss": 0.71, "step": 3635 }, { "epoch": 0.53, "grad_norm": 6.396990776062012, "learning_rate": 1.9028440622173156e-06, "loss": 0.6562, "step": 3636 }, { "epoch": 0.53, "grad_norm": 5.605077743530273, "learning_rate": 1.9027759944416986e-06, "loss": 0.7213, "step": 3637 }, { "epoch": 0.53, "grad_norm": 6.792370319366455, "learning_rate": 1.902707904048496e-06, "loss": 0.793, "step": 3638 }, { "epoch": 0.53, "grad_norm": 6.244933128356934, "learning_rate": 1.9026397910394131e-06, "loss": 0.7676, "step": 3639 }, { "epoch": 0.53, "grad_norm": 6.042548179626465, "learning_rate": 1.9025716554161572e-06, "loss": 0.7194, "step": 3640 }, { "epoch": 0.53, "grad_norm": 5.736743450164795, "learning_rate": 1.9025034971804353e-06, "loss": 0.6949, "step": 3641 }, { "epoch": 0.53, "grad_norm": 5.652421951293945, "learning_rate": 1.9024353163339544e-06, "loss": 0.6968, "step": 3642 }, { "epoch": 0.53, "grad_norm": 5.735596179962158, "learning_rate": 1.9023671128784228e-06, "loss": 0.6851, "step": 3643 }, { "epoch": 0.53, "grad_norm": 6.4210028648376465, "learning_rate": 1.9022988868155495e-06, "loss": 0.7943, "step": 3644 }, { "epoch": 0.53, "grad_norm": 6.2831292152404785, "learning_rate": 1.9022306381470438e-06, "loss": 0.6981, "step": 3645 }, { "epoch": 0.53, "grad_norm": 6.201391696929932, "learning_rate": 1.9021623668746153e-06, "loss": 0.8134, "step": 3646 }, { "epoch": 0.53, "grad_norm": 5.469473361968994, "learning_rate": 1.9020940729999748e-06, "loss": 0.6538, "step": 3647 }, { "epoch": 0.53, "grad_norm": 6.0893449783325195, "learning_rate": 1.9020257565248328e-06, "loss": 0.6823, "step": 3648 }, { "epoch": 0.53, "grad_norm": 5.760097503662109, "learning_rate": 1.9019574174509012e-06, "loss": 0.7045, "step": 3649 }, { "epoch": 0.53, "grad_norm": 5.796894550323486, "learning_rate": 1.901889055779892e-06, "loss": 0.711, "step": 3650 }, { "epoch": 0.53, "grad_norm": 5.811110496520996, "learning_rate": 1.9018206715135178e-06, "loss": 0.7576, "step": 3651 }, { "epoch": 0.53, "grad_norm": 5.500098705291748, "learning_rate": 1.9017522646534923e-06, "loss": 0.6744, "step": 3652 }, { "epoch": 0.53, "grad_norm": 5.212342262268066, "learning_rate": 1.9016838352015287e-06, "loss": 0.7268, "step": 3653 }, { "epoch": 0.53, "grad_norm": 7.053094863891602, "learning_rate": 1.9016153831593419e-06, "loss": 0.7752, "step": 3654 }, { "epoch": 0.53, "grad_norm": 5.488373279571533, "learning_rate": 1.9015469085286466e-06, "loss": 0.7289, "step": 3655 }, { "epoch": 0.53, "grad_norm": 5.74357795715332, "learning_rate": 1.9014784113111583e-06, "loss": 0.7733, "step": 3656 }, { "epoch": 0.53, "grad_norm": 6.140350341796875, "learning_rate": 1.901409891508593e-06, "loss": 0.7669, "step": 3657 }, { "epoch": 0.53, "grad_norm": 5.514066219329834, "learning_rate": 1.9013413491226676e-06, "loss": 0.6595, "step": 3658 }, { "epoch": 0.53, "grad_norm": 5.7539167404174805, "learning_rate": 1.9012727841550994e-06, "loss": 0.7274, "step": 3659 }, { "epoch": 0.53, "grad_norm": 6.713963508605957, "learning_rate": 1.901204196607606e-06, "loss": 0.7207, "step": 3660 }, { "epoch": 0.53, "grad_norm": 6.225827693939209, "learning_rate": 1.9011355864819056e-06, "loss": 0.7464, "step": 3661 }, { "epoch": 0.53, "grad_norm": 5.9193115234375, "learning_rate": 1.9010669537797173e-06, "loss": 0.7997, "step": 3662 }, { "epoch": 0.53, "grad_norm": 5.996858596801758, "learning_rate": 1.9009982985027607e-06, "loss": 0.6644, "step": 3663 }, { "epoch": 0.53, "grad_norm": 5.5743231773376465, "learning_rate": 1.9009296206527553e-06, "loss": 0.71, "step": 3664 }, { "epoch": 0.53, "grad_norm": 7.195181846618652, "learning_rate": 1.9008609202314225e-06, "loss": 0.7938, "step": 3665 }, { "epoch": 0.53, "grad_norm": 5.425824165344238, "learning_rate": 1.900792197240483e-06, "loss": 0.6743, "step": 3666 }, { "epoch": 0.53, "grad_norm": 5.4927263259887695, "learning_rate": 1.9007234516816586e-06, "loss": 0.7641, "step": 3667 }, { "epoch": 0.53, "grad_norm": 6.550882816314697, "learning_rate": 1.9006546835566717e-06, "loss": 0.7047, "step": 3668 }, { "epoch": 0.53, "grad_norm": 5.447956085205078, "learning_rate": 1.900585892867245e-06, "loss": 0.6941, "step": 3669 }, { "epoch": 0.53, "grad_norm": 6.4334540367126465, "learning_rate": 1.9005170796151023e-06, "loss": 0.8112, "step": 3670 }, { "epoch": 0.53, "grad_norm": 5.523262977600098, "learning_rate": 1.9004482438019668e-06, "loss": 0.7161, "step": 3671 }, { "epoch": 0.53, "grad_norm": 6.0735249519348145, "learning_rate": 1.9003793854295642e-06, "loss": 0.7552, "step": 3672 }, { "epoch": 0.53, "grad_norm": 5.491125583648682, "learning_rate": 1.9003105044996188e-06, "loss": 0.6972, "step": 3673 }, { "epoch": 0.53, "grad_norm": 6.1066765785217285, "learning_rate": 1.9002416010138567e-06, "loss": 0.7906, "step": 3674 }, { "epoch": 0.53, "grad_norm": 5.343003749847412, "learning_rate": 1.9001726749740038e-06, "loss": 0.7453, "step": 3675 }, { "epoch": 0.53, "grad_norm": 5.219620704650879, "learning_rate": 1.9001037263817874e-06, "loss": 0.7253, "step": 3676 }, { "epoch": 0.53, "grad_norm": 6.181654453277588, "learning_rate": 1.9000347552389346e-06, "loss": 0.7034, "step": 3677 }, { "epoch": 0.53, "grad_norm": 6.377232551574707, "learning_rate": 1.8999657615471733e-06, "loss": 0.895, "step": 3678 }, { "epoch": 0.53, "grad_norm": 6.3075714111328125, "learning_rate": 1.8998967453082323e-06, "loss": 0.6478, "step": 3679 }, { "epoch": 0.53, "grad_norm": 5.889284133911133, "learning_rate": 1.8998277065238405e-06, "loss": 0.8324, "step": 3680 }, { "epoch": 0.53, "grad_norm": 5.49477481842041, "learning_rate": 1.8997586451957273e-06, "loss": 0.7353, "step": 3681 }, { "epoch": 0.53, "grad_norm": 6.07155704498291, "learning_rate": 1.8996895613256233e-06, "loss": 0.8413, "step": 3682 }, { "epoch": 0.53, "grad_norm": 5.762080192565918, "learning_rate": 1.8996204549152592e-06, "loss": 0.7177, "step": 3683 }, { "epoch": 0.53, "grad_norm": 5.232661724090576, "learning_rate": 1.8995513259663665e-06, "loss": 0.6119, "step": 3684 }, { "epoch": 0.53, "grad_norm": 5.627023696899414, "learning_rate": 1.8994821744806771e-06, "loss": 0.6579, "step": 3685 }, { "epoch": 0.53, "grad_norm": 5.60296630859375, "learning_rate": 1.899413000459923e-06, "loss": 0.7584, "step": 3686 }, { "epoch": 0.53, "grad_norm": 5.7252044677734375, "learning_rate": 1.8993438039058375e-06, "loss": 0.6982, "step": 3687 }, { "epoch": 0.54, "grad_norm": 6.106780529022217, "learning_rate": 1.8992745848201546e-06, "loss": 0.7624, "step": 3688 }, { "epoch": 0.54, "grad_norm": 6.1299519538879395, "learning_rate": 1.899205343204608e-06, "loss": 0.7103, "step": 3689 }, { "epoch": 0.54, "grad_norm": 5.904615879058838, "learning_rate": 1.8991360790609322e-06, "loss": 0.6551, "step": 3690 }, { "epoch": 0.54, "grad_norm": 5.802899360656738, "learning_rate": 1.8990667923908633e-06, "loss": 0.6685, "step": 3691 }, { "epoch": 0.54, "grad_norm": 6.50769567489624, "learning_rate": 1.8989974831961369e-06, "loss": 0.8741, "step": 3692 }, { "epoch": 0.54, "grad_norm": 5.703862190246582, "learning_rate": 1.898928151478489e-06, "loss": 0.6809, "step": 3693 }, { "epoch": 0.54, "grad_norm": 6.432775974273682, "learning_rate": 1.898858797239657e-06, "loss": 0.7725, "step": 3694 }, { "epoch": 0.54, "grad_norm": 5.80025577545166, "learning_rate": 1.8987894204813782e-06, "loss": 0.7411, "step": 3695 }, { "epoch": 0.54, "grad_norm": 6.578402996063232, "learning_rate": 1.898720021205391e-06, "loss": 0.8125, "step": 3696 }, { "epoch": 0.54, "grad_norm": 5.755812644958496, "learning_rate": 1.8986505994134336e-06, "loss": 0.731, "step": 3697 }, { "epoch": 0.54, "grad_norm": 5.529111862182617, "learning_rate": 1.8985811551072459e-06, "loss": 0.6597, "step": 3698 }, { "epoch": 0.54, "grad_norm": 6.056437015533447, "learning_rate": 1.8985116882885673e-06, "loss": 0.7366, "step": 3699 }, { "epoch": 0.54, "grad_norm": 6.464015483856201, "learning_rate": 1.8984421989591385e-06, "loss": 0.7407, "step": 3700 }, { "epoch": 0.54, "grad_norm": 5.31296968460083, "learning_rate": 1.8983726871207e-06, "loss": 0.6296, "step": 3701 }, { "epoch": 0.54, "grad_norm": 6.3441386222839355, "learning_rate": 1.8983031527749933e-06, "loss": 0.7445, "step": 3702 }, { "epoch": 0.54, "grad_norm": 5.312503337860107, "learning_rate": 1.8982335959237609e-06, "loss": 0.7092, "step": 3703 }, { "epoch": 0.54, "grad_norm": 5.574075222015381, "learning_rate": 1.8981640165687454e-06, "loss": 0.6782, "step": 3704 }, { "epoch": 0.54, "grad_norm": 6.849024772644043, "learning_rate": 1.8980944147116897e-06, "loss": 0.7219, "step": 3705 }, { "epoch": 0.54, "grad_norm": 5.323756217956543, "learning_rate": 1.8980247903543378e-06, "loss": 0.7135, "step": 3706 }, { "epoch": 0.54, "grad_norm": 6.013281345367432, "learning_rate": 1.8979551434984335e-06, "loss": 0.7233, "step": 3707 }, { "epoch": 0.54, "grad_norm": 5.5538201332092285, "learning_rate": 1.8978854741457226e-06, "loss": 0.7998, "step": 3708 }, { "epoch": 0.54, "grad_norm": 5.586813449859619, "learning_rate": 1.89781578229795e-06, "loss": 0.7553, "step": 3709 }, { "epoch": 0.54, "grad_norm": 5.312394142150879, "learning_rate": 1.8977460679568616e-06, "loss": 0.7687, "step": 3710 }, { "epoch": 0.54, "grad_norm": 5.90488338470459, "learning_rate": 1.8976763311242041e-06, "loss": 0.8133, "step": 3711 }, { "epoch": 0.54, "grad_norm": 5.958365440368652, "learning_rate": 1.8976065718017246e-06, "loss": 0.7428, "step": 3712 }, { "epoch": 0.54, "grad_norm": 5.292104244232178, "learning_rate": 1.8975367899911713e-06, "loss": 0.6825, "step": 3713 }, { "epoch": 0.54, "grad_norm": 6.361595153808594, "learning_rate": 1.8974669856942918e-06, "loss": 0.6865, "step": 3714 }, { "epoch": 0.54, "grad_norm": 7.0997419357299805, "learning_rate": 1.8973971589128352e-06, "loss": 0.7645, "step": 3715 }, { "epoch": 0.54, "grad_norm": 5.781616687774658, "learning_rate": 1.897327309648551e-06, "loss": 0.7013, "step": 3716 }, { "epoch": 0.54, "grad_norm": 5.451426029205322, "learning_rate": 1.897257437903189e-06, "loss": 0.684, "step": 3717 }, { "epoch": 0.54, "grad_norm": 6.8027544021606445, "learning_rate": 1.8971875436784997e-06, "loss": 0.7951, "step": 3718 }, { "epoch": 0.54, "grad_norm": 6.201340675354004, "learning_rate": 1.8971176269762345e-06, "loss": 0.8045, "step": 3719 }, { "epoch": 0.54, "grad_norm": 5.875874042510986, "learning_rate": 1.8970476877981446e-06, "loss": 0.7641, "step": 3720 }, { "epoch": 0.54, "grad_norm": 5.30792760848999, "learning_rate": 1.8969777261459824e-06, "loss": 0.7511, "step": 3721 }, { "epoch": 0.54, "grad_norm": 5.943900108337402, "learning_rate": 1.896907742021501e-06, "loss": 0.762, "step": 3722 }, { "epoch": 0.54, "grad_norm": 5.273178577423096, "learning_rate": 1.8968377354264531e-06, "loss": 0.6856, "step": 3723 }, { "epoch": 0.54, "grad_norm": 5.661415100097656, "learning_rate": 1.8967677063625934e-06, "loss": 0.7014, "step": 3724 }, { "epoch": 0.54, "grad_norm": 6.285642623901367, "learning_rate": 1.8966976548316755e-06, "loss": 0.7758, "step": 3725 }, { "epoch": 0.54, "grad_norm": 5.295578479766846, "learning_rate": 1.896627580835455e-06, "loss": 0.7039, "step": 3726 }, { "epoch": 0.54, "grad_norm": 5.3867950439453125, "learning_rate": 1.896557484375687e-06, "loss": 0.7751, "step": 3727 }, { "epoch": 0.54, "grad_norm": 7.163607597351074, "learning_rate": 1.8964873654541284e-06, "loss": 0.7629, "step": 3728 }, { "epoch": 0.54, "grad_norm": 5.750369071960449, "learning_rate": 1.8964172240725356e-06, "loss": 0.727, "step": 3729 }, { "epoch": 0.54, "grad_norm": 5.5521039962768555, "learning_rate": 1.8963470602326655e-06, "loss": 0.6736, "step": 3730 }, { "epoch": 0.54, "grad_norm": 5.390524864196777, "learning_rate": 1.8962768739362761e-06, "loss": 0.7444, "step": 3731 }, { "epoch": 0.54, "grad_norm": 6.432868003845215, "learning_rate": 1.8962066651851263e-06, "loss": 0.7314, "step": 3732 }, { "epoch": 0.54, "grad_norm": 5.899489879608154, "learning_rate": 1.8961364339809745e-06, "loss": 0.8199, "step": 3733 }, { "epoch": 0.54, "grad_norm": 5.465852737426758, "learning_rate": 1.8960661803255803e-06, "loss": 0.7371, "step": 3734 }, { "epoch": 0.54, "grad_norm": 5.535419464111328, "learning_rate": 1.8959959042207039e-06, "loss": 0.6887, "step": 3735 }, { "epoch": 0.54, "grad_norm": 5.396625518798828, "learning_rate": 1.8959256056681059e-06, "loss": 0.7654, "step": 3736 }, { "epoch": 0.54, "grad_norm": 5.5470380783081055, "learning_rate": 1.895855284669548e-06, "loss": 0.7086, "step": 3737 }, { "epoch": 0.54, "grad_norm": 5.43189811706543, "learning_rate": 1.895784941226791e-06, "loss": 0.6911, "step": 3738 }, { "epoch": 0.54, "grad_norm": 6.0468854904174805, "learning_rate": 1.8957145753415982e-06, "loss": 0.8164, "step": 3739 }, { "epoch": 0.54, "grad_norm": 5.698955535888672, "learning_rate": 1.895644187015732e-06, "loss": 0.6783, "step": 3740 }, { "epoch": 0.54, "grad_norm": 5.578117847442627, "learning_rate": 1.895573776250956e-06, "loss": 0.7241, "step": 3741 }, { "epoch": 0.54, "grad_norm": 6.9787492752075195, "learning_rate": 1.8955033430490342e-06, "loss": 0.7456, "step": 3742 }, { "epoch": 0.54, "grad_norm": 6.538539886474609, "learning_rate": 1.8954328874117311e-06, "loss": 0.7393, "step": 3743 }, { "epoch": 0.54, "grad_norm": 6.5313544273376465, "learning_rate": 1.8953624093408118e-06, "loss": 0.6124, "step": 3744 }, { "epoch": 0.54, "grad_norm": 6.033573150634766, "learning_rate": 1.8952919088380423e-06, "loss": 0.7797, "step": 3745 }, { "epoch": 0.54, "grad_norm": 6.05507230758667, "learning_rate": 1.8952213859051888e-06, "loss": 0.6626, "step": 3746 }, { "epoch": 0.54, "grad_norm": 5.147585868835449, "learning_rate": 1.895150840544018e-06, "loss": 0.6818, "step": 3747 }, { "epoch": 0.54, "grad_norm": 5.913471221923828, "learning_rate": 1.8950802727562972e-06, "loss": 0.8182, "step": 3748 }, { "epoch": 0.54, "grad_norm": 6.707426071166992, "learning_rate": 1.8950096825437948e-06, "loss": 0.7983, "step": 3749 }, { "epoch": 0.54, "grad_norm": 5.341215133666992, "learning_rate": 1.8949390699082787e-06, "loss": 0.701, "step": 3750 }, { "epoch": 0.54, "grad_norm": 5.078060626983643, "learning_rate": 1.8948684348515185e-06, "loss": 0.6524, "step": 3751 }, { "epoch": 0.54, "grad_norm": 6.459370136260986, "learning_rate": 1.8947977773752836e-06, "loss": 0.7146, "step": 3752 }, { "epoch": 0.54, "grad_norm": 6.53359842300415, "learning_rate": 1.8947270974813444e-06, "loss": 0.7773, "step": 3753 }, { "epoch": 0.54, "grad_norm": 5.972790241241455, "learning_rate": 1.8946563951714716e-06, "loss": 0.8257, "step": 3754 }, { "epoch": 0.54, "grad_norm": 5.089667797088623, "learning_rate": 1.8945856704474364e-06, "loss": 0.6748, "step": 3755 }, { "epoch": 0.54, "grad_norm": 5.223992824554443, "learning_rate": 1.8945149233110107e-06, "loss": 0.7435, "step": 3756 }, { "epoch": 0.55, "grad_norm": 6.6291117668151855, "learning_rate": 1.8944441537639672e-06, "loss": 0.6883, "step": 3757 }, { "epoch": 0.55, "grad_norm": 6.004089832305908, "learning_rate": 1.8943733618080786e-06, "loss": 0.6442, "step": 3758 }, { "epoch": 0.55, "grad_norm": 5.707785606384277, "learning_rate": 1.8943025474451186e-06, "loss": 0.6663, "step": 3759 }, { "epoch": 0.55, "grad_norm": 5.853376388549805, "learning_rate": 1.8942317106768615e-06, "loss": 0.727, "step": 3760 }, { "epoch": 0.55, "grad_norm": 5.504909038543701, "learning_rate": 1.8941608515050817e-06, "loss": 0.7366, "step": 3761 }, { "epoch": 0.55, "grad_norm": 5.758227825164795, "learning_rate": 1.894089969931555e-06, "loss": 0.7625, "step": 3762 }, { "epoch": 0.55, "grad_norm": 5.689115524291992, "learning_rate": 1.8940190659580563e-06, "loss": 0.7492, "step": 3763 }, { "epoch": 0.55, "grad_norm": 5.322715759277344, "learning_rate": 1.8939481395863628e-06, "loss": 0.6987, "step": 3764 }, { "epoch": 0.55, "grad_norm": 5.838770389556885, "learning_rate": 1.8938771908182512e-06, "loss": 0.7381, "step": 3765 }, { "epoch": 0.55, "grad_norm": 5.788283348083496, "learning_rate": 1.893806219655499e-06, "loss": 0.745, "step": 3766 }, { "epoch": 0.55, "grad_norm": 5.514571189880371, "learning_rate": 1.8937352260998845e-06, "loss": 0.7444, "step": 3767 }, { "epoch": 0.55, "grad_norm": 6.070830821990967, "learning_rate": 1.8936642101531856e-06, "loss": 0.8255, "step": 3768 }, { "epoch": 0.55, "grad_norm": 5.40324068069458, "learning_rate": 1.8935931718171823e-06, "loss": 0.6693, "step": 3769 }, { "epoch": 0.55, "grad_norm": 5.865451812744141, "learning_rate": 1.8935221110936537e-06, "loss": 0.7979, "step": 3770 }, { "epoch": 0.55, "grad_norm": 6.09391450881958, "learning_rate": 1.8934510279843807e-06, "loss": 0.7323, "step": 3771 }, { "epoch": 0.55, "grad_norm": 6.063720226287842, "learning_rate": 1.8933799224911438e-06, "loss": 0.6954, "step": 3772 }, { "epoch": 0.55, "grad_norm": 5.677762508392334, "learning_rate": 1.8933087946157244e-06, "loss": 0.8014, "step": 3773 }, { "epoch": 0.55, "grad_norm": 6.219740390777588, "learning_rate": 1.8932376443599048e-06, "loss": 0.7012, "step": 3774 }, { "epoch": 0.55, "grad_norm": 5.857813358306885, "learning_rate": 1.8931664717254674e-06, "loss": 0.7419, "step": 3775 }, { "epoch": 0.55, "grad_norm": 5.891201019287109, "learning_rate": 1.8930952767141953e-06, "loss": 0.6578, "step": 3776 }, { "epoch": 0.55, "grad_norm": 5.1006855964660645, "learning_rate": 1.893024059327872e-06, "loss": 0.67, "step": 3777 }, { "epoch": 0.55, "grad_norm": 6.495522499084473, "learning_rate": 1.8929528195682818e-06, "loss": 0.7055, "step": 3778 }, { "epoch": 0.55, "grad_norm": 5.456862449645996, "learning_rate": 1.8928815574372098e-06, "loss": 0.6807, "step": 3779 }, { "epoch": 0.55, "grad_norm": 7.004818439483643, "learning_rate": 1.8928102729364412e-06, "loss": 0.6979, "step": 3780 }, { "epoch": 0.55, "grad_norm": 6.28970193862915, "learning_rate": 1.8927389660677618e-06, "loss": 0.7827, "step": 3781 }, { "epoch": 0.55, "grad_norm": 5.630998611450195, "learning_rate": 1.8926676368329582e-06, "loss": 0.7175, "step": 3782 }, { "epoch": 0.55, "grad_norm": 5.599793910980225, "learning_rate": 1.8925962852338176e-06, "loss": 0.6877, "step": 3783 }, { "epoch": 0.55, "grad_norm": 5.197524070739746, "learning_rate": 1.8925249112721268e-06, "loss": 0.7202, "step": 3784 }, { "epoch": 0.55, "grad_norm": 5.727447986602783, "learning_rate": 1.8924535149496752e-06, "loss": 0.7929, "step": 3785 }, { "epoch": 0.55, "grad_norm": 5.6240553855896, "learning_rate": 1.8923820962682506e-06, "loss": 0.7197, "step": 3786 }, { "epoch": 0.55, "grad_norm": 5.08529806137085, "learning_rate": 1.8923106552296424e-06, "loss": 0.6472, "step": 3787 }, { "epoch": 0.55, "grad_norm": 5.771764755249023, "learning_rate": 1.8922391918356406e-06, "loss": 0.7125, "step": 3788 }, { "epoch": 0.55, "grad_norm": 5.553943157196045, "learning_rate": 1.892167706088036e-06, "loss": 0.7118, "step": 3789 }, { "epoch": 0.55, "grad_norm": 6.349117279052734, "learning_rate": 1.8920961979886187e-06, "loss": 0.7678, "step": 3790 }, { "epoch": 0.55, "grad_norm": 6.369024276733398, "learning_rate": 1.8920246675391808e-06, "loss": 0.7945, "step": 3791 }, { "epoch": 0.55, "grad_norm": 6.494802951812744, "learning_rate": 1.8919531147415143e-06, "loss": 0.8271, "step": 3792 }, { "epoch": 0.55, "grad_norm": 5.43560266494751, "learning_rate": 1.8918815395974118e-06, "loss": 0.724, "step": 3793 }, { "epoch": 0.55, "grad_norm": 5.395975589752197, "learning_rate": 1.8918099421086664e-06, "loss": 0.6928, "step": 3794 }, { "epoch": 0.55, "grad_norm": 6.198655128479004, "learning_rate": 1.8917383222770718e-06, "loss": 0.7655, "step": 3795 }, { "epoch": 0.55, "grad_norm": 6.066186428070068, "learning_rate": 1.8916666801044227e-06, "loss": 0.7534, "step": 3796 }, { "epoch": 0.55, "grad_norm": 5.657782554626465, "learning_rate": 1.8915950155925136e-06, "loss": 0.7302, "step": 3797 }, { "epoch": 0.55, "grad_norm": 6.938567638397217, "learning_rate": 1.89152332874314e-06, "loss": 0.7594, "step": 3798 }, { "epoch": 0.55, "grad_norm": 5.996820449829102, "learning_rate": 1.8914516195580981e-06, "loss": 0.8044, "step": 3799 }, { "epoch": 0.55, "grad_norm": 6.1183085441589355, "learning_rate": 1.8913798880391845e-06, "loss": 0.7743, "step": 3800 }, { "epoch": 0.55, "grad_norm": 6.097893238067627, "learning_rate": 1.891308134188196e-06, "loss": 0.681, "step": 3801 }, { "epoch": 0.55, "grad_norm": 5.56819486618042, "learning_rate": 1.8912363580069304e-06, "loss": 0.7541, "step": 3802 }, { "epoch": 0.55, "grad_norm": 5.983480453491211, "learning_rate": 1.8911645594971858e-06, "loss": 0.7634, "step": 3803 }, { "epoch": 0.55, "grad_norm": 6.216529846191406, "learning_rate": 1.8910927386607614e-06, "loss": 0.7684, "step": 3804 }, { "epoch": 0.55, "grad_norm": 5.400991916656494, "learning_rate": 1.891020895499456e-06, "loss": 0.7662, "step": 3805 }, { "epoch": 0.55, "grad_norm": 6.793312072753906, "learning_rate": 1.8909490300150701e-06, "loss": 0.8025, "step": 3806 }, { "epoch": 0.55, "grad_norm": 5.993477821350098, "learning_rate": 1.8908771422094038e-06, "loss": 0.7636, "step": 3807 }, { "epoch": 0.55, "grad_norm": 5.769576072692871, "learning_rate": 1.8908052320842583e-06, "loss": 0.7784, "step": 3808 }, { "epoch": 0.55, "grad_norm": 5.9482316970825195, "learning_rate": 1.8907332996414351e-06, "loss": 0.672, "step": 3809 }, { "epoch": 0.55, "grad_norm": 5.270697116851807, "learning_rate": 1.8906613448827361e-06, "loss": 0.7285, "step": 3810 }, { "epoch": 0.55, "grad_norm": 6.128028392791748, "learning_rate": 1.8905893678099647e-06, "loss": 0.8156, "step": 3811 }, { "epoch": 0.55, "grad_norm": 5.826267242431641, "learning_rate": 1.8905173684249234e-06, "loss": 0.7425, "step": 3812 }, { "epoch": 0.55, "grad_norm": 7.025418758392334, "learning_rate": 1.8904453467294166e-06, "loss": 0.7983, "step": 3813 }, { "epoch": 0.55, "grad_norm": 5.663534641265869, "learning_rate": 1.8903733027252483e-06, "loss": 0.6717, "step": 3814 }, { "epoch": 0.55, "grad_norm": 5.401002883911133, "learning_rate": 1.8903012364142236e-06, "loss": 0.6671, "step": 3815 }, { "epoch": 0.55, "grad_norm": 6.100997447967529, "learning_rate": 1.8902291477981482e-06, "loss": 0.8063, "step": 3816 }, { "epoch": 0.55, "grad_norm": 5.753173828125, "learning_rate": 1.8901570368788277e-06, "loss": 0.7046, "step": 3817 }, { "epoch": 0.55, "grad_norm": 5.442505836486816, "learning_rate": 1.8900849036580695e-06, "loss": 0.6607, "step": 3818 }, { "epoch": 0.55, "grad_norm": 6.130781173706055, "learning_rate": 1.8900127481376797e-06, "loss": 0.7741, "step": 3819 }, { "epoch": 0.55, "grad_norm": 5.655824661254883, "learning_rate": 1.8899405703194668e-06, "loss": 0.7145, "step": 3820 }, { "epoch": 0.55, "grad_norm": 5.9046101570129395, "learning_rate": 1.8898683702052392e-06, "loss": 0.8211, "step": 3821 }, { "epoch": 0.55, "grad_norm": 6.232338905334473, "learning_rate": 1.8897961477968052e-06, "loss": 0.7377, "step": 3822 }, { "epoch": 0.55, "grad_norm": 6.036750793457031, "learning_rate": 1.8897239030959744e-06, "loss": 0.7276, "step": 3823 }, { "epoch": 0.55, "grad_norm": 5.8812761306762695, "learning_rate": 1.889651636104557e-06, "loss": 0.7664, "step": 3824 }, { "epoch": 0.55, "grad_norm": 6.090035915374756, "learning_rate": 1.8895793468243633e-06, "loss": 0.8217, "step": 3825 }, { "epoch": 0.56, "grad_norm": 5.529289245605469, "learning_rate": 1.8895070352572046e-06, "loss": 0.6484, "step": 3826 }, { "epoch": 0.56, "grad_norm": 6.284845352172852, "learning_rate": 1.8894347014048924e-06, "loss": 0.7035, "step": 3827 }, { "epoch": 0.56, "grad_norm": 5.920029640197754, "learning_rate": 1.889362345269239e-06, "loss": 0.7369, "step": 3828 }, { "epoch": 0.56, "grad_norm": 6.295899868011475, "learning_rate": 1.8892899668520568e-06, "loss": 0.7292, "step": 3829 }, { "epoch": 0.56, "grad_norm": 6.1841959953308105, "learning_rate": 1.8892175661551597e-06, "loss": 0.6718, "step": 3830 }, { "epoch": 0.56, "grad_norm": 5.3219709396362305, "learning_rate": 1.8891451431803613e-06, "loss": 0.7129, "step": 3831 }, { "epoch": 0.56, "grad_norm": 6.456075191497803, "learning_rate": 1.889072697929476e-06, "loss": 0.794, "step": 3832 }, { "epoch": 0.56, "grad_norm": 5.4191975593566895, "learning_rate": 1.889000230404319e-06, "loss": 0.7286, "step": 3833 }, { "epoch": 0.56, "grad_norm": 5.9183349609375, "learning_rate": 1.8889277406067052e-06, "loss": 0.7576, "step": 3834 }, { "epoch": 0.56, "grad_norm": 5.529291152954102, "learning_rate": 1.8888552285384517e-06, "loss": 0.6935, "step": 3835 }, { "epoch": 0.56, "grad_norm": 5.749897003173828, "learning_rate": 1.8887826942013744e-06, "loss": 0.7395, "step": 3836 }, { "epoch": 0.56, "grad_norm": 5.6175031661987305, "learning_rate": 1.8887101375972908e-06, "loss": 0.7354, "step": 3837 }, { "epoch": 0.56, "grad_norm": 6.0267133712768555, "learning_rate": 1.888637558728019e-06, "loss": 0.7658, "step": 3838 }, { "epoch": 0.56, "grad_norm": 4.933545112609863, "learning_rate": 1.8885649575953768e-06, "loss": 0.6395, "step": 3839 }, { "epoch": 0.56, "grad_norm": 5.4764180183410645, "learning_rate": 1.8884923342011836e-06, "loss": 0.6493, "step": 3840 }, { "epoch": 0.56, "grad_norm": 5.767940044403076, "learning_rate": 1.8884196885472585e-06, "loss": 0.7488, "step": 3841 }, { "epoch": 0.56, "grad_norm": 5.7634735107421875, "learning_rate": 1.8883470206354215e-06, "loss": 0.772, "step": 3842 }, { "epoch": 0.56, "grad_norm": 5.476386070251465, "learning_rate": 1.8882743304674932e-06, "loss": 0.7235, "step": 3843 }, { "epoch": 0.56, "grad_norm": 6.794938087463379, "learning_rate": 1.8882016180452952e-06, "loss": 0.6894, "step": 3844 }, { "epoch": 0.56, "grad_norm": 5.746413707733154, "learning_rate": 1.8881288833706487e-06, "loss": 0.6724, "step": 3845 }, { "epoch": 0.56, "grad_norm": 5.370162010192871, "learning_rate": 1.8880561264453761e-06, "loss": 0.6726, "step": 3846 }, { "epoch": 0.56, "grad_norm": 6.325157642364502, "learning_rate": 1.8879833472713002e-06, "loss": 0.7438, "step": 3847 }, { "epoch": 0.56, "grad_norm": 6.589658260345459, "learning_rate": 1.8879105458502442e-06, "loss": 0.7766, "step": 3848 }, { "epoch": 0.56, "grad_norm": 5.952274322509766, "learning_rate": 1.8878377221840325e-06, "loss": 0.7229, "step": 3849 }, { "epoch": 0.56, "grad_norm": 6.053658485412598, "learning_rate": 1.887764876274489e-06, "loss": 0.7036, "step": 3850 }, { "epoch": 0.56, "grad_norm": 5.425085544586182, "learning_rate": 1.8876920081234391e-06, "loss": 0.742, "step": 3851 }, { "epoch": 0.56, "grad_norm": 5.893641471862793, "learning_rate": 1.8876191177327084e-06, "loss": 0.7282, "step": 3852 }, { "epoch": 0.56, "grad_norm": 6.593148231506348, "learning_rate": 1.8875462051041225e-06, "loss": 0.8282, "step": 3853 }, { "epoch": 0.56, "grad_norm": 6.616939544677734, "learning_rate": 1.8874732702395089e-06, "loss": 0.7418, "step": 3854 }, { "epoch": 0.56, "grad_norm": 5.4549994468688965, "learning_rate": 1.8874003131406944e-06, "loss": 0.7279, "step": 3855 }, { "epoch": 0.56, "grad_norm": 6.068183898925781, "learning_rate": 1.8873273338095071e-06, "loss": 0.7104, "step": 3856 }, { "epoch": 0.56, "grad_norm": 5.872913360595703, "learning_rate": 1.887254332247775e-06, "loss": 0.8167, "step": 3857 }, { "epoch": 0.56, "grad_norm": 6.124499320983887, "learning_rate": 1.8871813084573271e-06, "loss": 0.8879, "step": 3858 }, { "epoch": 0.56, "grad_norm": 5.815803050994873, "learning_rate": 1.8871082624399932e-06, "loss": 0.7643, "step": 3859 }, { "epoch": 0.56, "grad_norm": 5.921733856201172, "learning_rate": 1.8870351941976033e-06, "loss": 0.8131, "step": 3860 }, { "epoch": 0.56, "grad_norm": 6.027994632720947, "learning_rate": 1.8869621037319878e-06, "loss": 0.7434, "step": 3861 }, { "epoch": 0.56, "grad_norm": 6.088806629180908, "learning_rate": 1.886888991044978e-06, "loss": 0.7612, "step": 3862 }, { "epoch": 0.56, "grad_norm": 6.103236675262451, "learning_rate": 1.8868158561384054e-06, "loss": 0.7626, "step": 3863 }, { "epoch": 0.56, "grad_norm": 5.584204196929932, "learning_rate": 1.8867426990141025e-06, "loss": 0.7214, "step": 3864 }, { "epoch": 0.56, "grad_norm": 5.630407333374023, "learning_rate": 1.8866695196739021e-06, "loss": 0.7182, "step": 3865 }, { "epoch": 0.56, "grad_norm": 6.615478515625, "learning_rate": 1.8865963181196377e-06, "loss": 0.6616, "step": 3866 }, { "epoch": 0.56, "grad_norm": 5.599752902984619, "learning_rate": 1.8865230943531428e-06, "loss": 0.7838, "step": 3867 }, { "epoch": 0.56, "grad_norm": 5.663799285888672, "learning_rate": 1.8864498483762527e-06, "loss": 0.6649, "step": 3868 }, { "epoch": 0.56, "grad_norm": 6.235529899597168, "learning_rate": 1.8863765801908018e-06, "loss": 0.8269, "step": 3869 }, { "epoch": 0.56, "grad_norm": 5.95792818069458, "learning_rate": 1.8863032897986257e-06, "loss": 0.7392, "step": 3870 }, { "epoch": 0.56, "grad_norm": 5.671112537384033, "learning_rate": 1.886229977201561e-06, "loss": 0.7627, "step": 3871 }, { "epoch": 0.56, "grad_norm": 5.809866905212402, "learning_rate": 1.8861566424014442e-06, "loss": 0.6857, "step": 3872 }, { "epoch": 0.56, "grad_norm": 5.993608474731445, "learning_rate": 1.8860832854001126e-06, "loss": 0.7197, "step": 3873 }, { "epoch": 0.56, "grad_norm": 5.630556583404541, "learning_rate": 1.8860099061994038e-06, "loss": 0.7066, "step": 3874 }, { "epoch": 0.56, "grad_norm": 5.763593673706055, "learning_rate": 1.8859365048011565e-06, "loss": 0.7332, "step": 3875 }, { "epoch": 0.56, "grad_norm": 6.056605815887451, "learning_rate": 1.8858630812072093e-06, "loss": 0.7357, "step": 3876 }, { "epoch": 0.56, "grad_norm": 5.702060699462891, "learning_rate": 1.8857896354194024e-06, "loss": 0.7137, "step": 3877 }, { "epoch": 0.56, "grad_norm": 5.299181938171387, "learning_rate": 1.8857161674395755e-06, "loss": 0.6692, "step": 3878 }, { "epoch": 0.56, "grad_norm": 5.56968879699707, "learning_rate": 1.8856426772695686e-06, "loss": 0.6779, "step": 3879 }, { "epoch": 0.56, "grad_norm": 5.412726402282715, "learning_rate": 1.885569164911224e-06, "loss": 0.7198, "step": 3880 }, { "epoch": 0.56, "grad_norm": 6.791935920715332, "learning_rate": 1.8854956303663826e-06, "loss": 0.702, "step": 3881 }, { "epoch": 0.56, "grad_norm": 5.737873077392578, "learning_rate": 1.885422073636887e-06, "loss": 0.6809, "step": 3882 }, { "epoch": 0.56, "grad_norm": 5.764128684997559, "learning_rate": 1.8853484947245799e-06, "loss": 0.7152, "step": 3883 }, { "epoch": 0.56, "grad_norm": 6.212802410125732, "learning_rate": 1.8852748936313052e-06, "loss": 0.7713, "step": 3884 }, { "epoch": 0.56, "grad_norm": 6.23611307144165, "learning_rate": 1.8852012703589062e-06, "loss": 0.7342, "step": 3885 }, { "epoch": 0.56, "grad_norm": 5.882552146911621, "learning_rate": 1.8851276249092276e-06, "loss": 0.729, "step": 3886 }, { "epoch": 0.56, "grad_norm": 6.56356143951416, "learning_rate": 1.8850539572841147e-06, "loss": 0.7729, "step": 3887 }, { "epoch": 0.56, "grad_norm": 6.161940574645996, "learning_rate": 1.8849802674854128e-06, "loss": 0.6513, "step": 3888 }, { "epoch": 0.56, "grad_norm": 5.370474338531494, "learning_rate": 1.8849065555149682e-06, "loss": 0.6943, "step": 3889 }, { "epoch": 0.56, "grad_norm": 5.455685138702393, "learning_rate": 1.884832821374628e-06, "loss": 0.7538, "step": 3890 }, { "epoch": 0.56, "grad_norm": 6.292651176452637, "learning_rate": 1.8847590650662388e-06, "loss": 0.7409, "step": 3891 }, { "epoch": 0.56, "grad_norm": 6.713366508483887, "learning_rate": 1.8846852865916486e-06, "loss": 0.7636, "step": 3892 }, { "epoch": 0.56, "grad_norm": 5.624698638916016, "learning_rate": 1.8846114859527064e-06, "loss": 0.7272, "step": 3893 }, { "epoch": 0.57, "grad_norm": 5.769314289093018, "learning_rate": 1.8845376631512609e-06, "loss": 0.6957, "step": 3894 }, { "epoch": 0.57, "grad_norm": 5.615253925323486, "learning_rate": 1.8844638181891615e-06, "loss": 0.7112, "step": 3895 }, { "epoch": 0.57, "grad_norm": 5.461129665374756, "learning_rate": 1.884389951068258e-06, "loss": 0.7334, "step": 3896 }, { "epoch": 0.57, "grad_norm": 5.148087978363037, "learning_rate": 1.8843160617904012e-06, "loss": 0.719, "step": 3897 }, { "epoch": 0.57, "grad_norm": 6.307846546173096, "learning_rate": 1.8842421503574423e-06, "loss": 0.7226, "step": 3898 }, { "epoch": 0.57, "grad_norm": 5.6826581954956055, "learning_rate": 1.8841682167712332e-06, "loss": 0.7606, "step": 3899 }, { "epoch": 0.57, "grad_norm": 5.876890659332275, "learning_rate": 1.884094261033626e-06, "loss": 0.7678, "step": 3900 }, { "epoch": 0.57, "grad_norm": 5.486367225646973, "learning_rate": 1.8840202831464737e-06, "loss": 0.6896, "step": 3901 }, { "epoch": 0.57, "grad_norm": 5.949161529541016, "learning_rate": 1.8839462831116296e-06, "loss": 0.6734, "step": 3902 }, { "epoch": 0.57, "grad_norm": 5.592111587524414, "learning_rate": 1.8838722609309477e-06, "loss": 0.6848, "step": 3903 }, { "epoch": 0.57, "grad_norm": 5.265902996063232, "learning_rate": 1.8837982166062822e-06, "loss": 0.6628, "step": 3904 }, { "epoch": 0.57, "grad_norm": 5.14247989654541, "learning_rate": 1.8837241501394883e-06, "loss": 0.7331, "step": 3905 }, { "epoch": 0.57, "grad_norm": 5.4131293296813965, "learning_rate": 1.8836500615324221e-06, "loss": 0.6838, "step": 3906 }, { "epoch": 0.57, "grad_norm": 5.4800825119018555, "learning_rate": 1.8835759507869392e-06, "loss": 0.7641, "step": 3907 }, { "epoch": 0.57, "grad_norm": 5.831654071807861, "learning_rate": 1.8835018179048965e-06, "loss": 0.6946, "step": 3908 }, { "epoch": 0.57, "grad_norm": 6.416829586029053, "learning_rate": 1.8834276628881511e-06, "loss": 0.8013, "step": 3909 }, { "epoch": 0.57, "grad_norm": 6.306974411010742, "learning_rate": 1.8833534857385612e-06, "loss": 0.7365, "step": 3910 }, { "epoch": 0.57, "grad_norm": 5.7118988037109375, "learning_rate": 1.883279286457985e-06, "loss": 0.7951, "step": 3911 }, { "epoch": 0.57, "grad_norm": 6.7360520362854, "learning_rate": 1.8832050650482818e-06, "loss": 0.7906, "step": 3912 }, { "epoch": 0.57, "grad_norm": 5.264839172363281, "learning_rate": 1.8831308215113105e-06, "loss": 0.6795, "step": 3913 }, { "epoch": 0.57, "grad_norm": 5.59733772277832, "learning_rate": 1.8830565558489312e-06, "loss": 0.7563, "step": 3914 }, { "epoch": 0.57, "grad_norm": 6.343383312225342, "learning_rate": 1.882982268063005e-06, "loss": 0.691, "step": 3915 }, { "epoch": 0.57, "grad_norm": 6.290528297424316, "learning_rate": 1.8829079581553925e-06, "loss": 0.713, "step": 3916 }, { "epoch": 0.57, "grad_norm": 5.844059944152832, "learning_rate": 1.8828336261279558e-06, "loss": 0.7927, "step": 3917 }, { "epoch": 0.57, "grad_norm": 5.7295918464660645, "learning_rate": 1.8827592719825573e-06, "loss": 0.8407, "step": 3918 }, { "epoch": 0.57, "grad_norm": 6.649172306060791, "learning_rate": 1.8826848957210592e-06, "loss": 0.8696, "step": 3919 }, { "epoch": 0.57, "grad_norm": 6.9072699546813965, "learning_rate": 1.8826104973453254e-06, "loss": 0.7125, "step": 3920 }, { "epoch": 0.57, "grad_norm": 5.666875839233398, "learning_rate": 1.8825360768572196e-06, "loss": 0.7291, "step": 3921 }, { "epoch": 0.57, "grad_norm": 5.613813877105713, "learning_rate": 1.8824616342586065e-06, "loss": 0.674, "step": 3922 }, { "epoch": 0.57, "grad_norm": 6.0310773849487305, "learning_rate": 1.8823871695513512e-06, "loss": 0.7467, "step": 3923 }, { "epoch": 0.57, "grad_norm": 6.303100109100342, "learning_rate": 1.882312682737319e-06, "loss": 0.6772, "step": 3924 }, { "epoch": 0.57, "grad_norm": 5.809709072113037, "learning_rate": 1.8822381738183757e-06, "loss": 0.7486, "step": 3925 }, { "epoch": 0.57, "grad_norm": 5.298140525817871, "learning_rate": 1.8821636427963886e-06, "loss": 0.7886, "step": 3926 }, { "epoch": 0.57, "grad_norm": 5.904175758361816, "learning_rate": 1.8820890896732248e-06, "loss": 0.7612, "step": 3927 }, { "epoch": 0.57, "grad_norm": 5.699484348297119, "learning_rate": 1.8820145144507525e-06, "loss": 0.7916, "step": 3928 }, { "epoch": 0.57, "grad_norm": 6.056270599365234, "learning_rate": 1.8819399171308393e-06, "loss": 0.7625, "step": 3929 }, { "epoch": 0.57, "grad_norm": 5.708676815032959, "learning_rate": 1.8818652977153544e-06, "loss": 0.6406, "step": 3930 }, { "epoch": 0.57, "grad_norm": 5.3197479248046875, "learning_rate": 1.8817906562061675e-06, "loss": 0.6866, "step": 3931 }, { "epoch": 0.57, "grad_norm": 6.069167613983154, "learning_rate": 1.8817159926051486e-06, "loss": 0.6967, "step": 3932 }, { "epoch": 0.57, "grad_norm": 5.581554889678955, "learning_rate": 1.8816413069141682e-06, "loss": 0.7169, "step": 3933 }, { "epoch": 0.57, "grad_norm": 5.532426834106445, "learning_rate": 1.881566599135097e-06, "loss": 0.6681, "step": 3934 }, { "epoch": 0.57, "grad_norm": 5.900478363037109, "learning_rate": 1.8814918692698073e-06, "loss": 0.733, "step": 3935 }, { "epoch": 0.57, "grad_norm": 5.9575419425964355, "learning_rate": 1.881417117320171e-06, "loss": 0.6387, "step": 3936 }, { "epoch": 0.57, "grad_norm": 6.107063293457031, "learning_rate": 1.8813423432880611e-06, "loss": 0.7422, "step": 3937 }, { "epoch": 0.57, "grad_norm": 6.201213359832764, "learning_rate": 1.8812675471753506e-06, "loss": 0.7622, "step": 3938 }, { "epoch": 0.57, "grad_norm": 5.216308116912842, "learning_rate": 1.881192728983914e-06, "loss": 0.721, "step": 3939 }, { "epoch": 0.57, "grad_norm": 6.265153884887695, "learning_rate": 1.881117888715625e-06, "loss": 0.7674, "step": 3940 }, { "epoch": 0.57, "grad_norm": 7.712737083435059, "learning_rate": 1.8810430263723592e-06, "loss": 0.6773, "step": 3941 }, { "epoch": 0.57, "grad_norm": 6.516477108001709, "learning_rate": 1.8809681419559915e-06, "loss": 0.7752, "step": 3942 }, { "epoch": 0.57, "grad_norm": 6.090323448181152, "learning_rate": 1.8808932354683988e-06, "loss": 0.7838, "step": 3943 }, { "epoch": 0.57, "grad_norm": 5.713118553161621, "learning_rate": 1.8808183069114574e-06, "loss": 0.7519, "step": 3944 }, { "epoch": 0.57, "grad_norm": 5.559053897857666, "learning_rate": 1.8807433562870444e-06, "loss": 0.6624, "step": 3945 }, { "epoch": 0.57, "grad_norm": 5.934262752532959, "learning_rate": 1.8806683835970376e-06, "loss": 0.7191, "step": 3946 }, { "epoch": 0.57, "grad_norm": 5.734877586364746, "learning_rate": 1.8805933888433155e-06, "loss": 0.7112, "step": 3947 }, { "epoch": 0.57, "grad_norm": 5.906218528747559, "learning_rate": 1.8805183720277568e-06, "loss": 0.6902, "step": 3948 }, { "epoch": 0.57, "grad_norm": 5.791180610656738, "learning_rate": 1.8804433331522408e-06, "loss": 0.728, "step": 3949 }, { "epoch": 0.57, "grad_norm": 5.338413238525391, "learning_rate": 1.880368272218648e-06, "loss": 0.7268, "step": 3950 }, { "epoch": 0.57, "grad_norm": 5.789515495300293, "learning_rate": 1.8802931892288585e-06, "loss": 0.7751, "step": 3951 }, { "epoch": 0.57, "grad_norm": 5.501718044281006, "learning_rate": 1.8802180841847535e-06, "loss": 0.7295, "step": 3952 }, { "epoch": 0.57, "grad_norm": 5.605016708374023, "learning_rate": 1.8801429570882145e-06, "loss": 0.7046, "step": 3953 }, { "epoch": 0.57, "grad_norm": 5.394322872161865, "learning_rate": 1.8800678079411237e-06, "loss": 0.6138, "step": 3954 }, { "epoch": 0.57, "grad_norm": 5.729414939880371, "learning_rate": 1.879992636745364e-06, "loss": 0.7095, "step": 3955 }, { "epoch": 0.57, "grad_norm": 5.559628009796143, "learning_rate": 1.8799174435028187e-06, "loss": 0.697, "step": 3956 }, { "epoch": 0.57, "grad_norm": 6.5360846519470215, "learning_rate": 1.8798422282153713e-06, "loss": 0.6761, "step": 3957 }, { "epoch": 0.57, "grad_norm": 5.001737117767334, "learning_rate": 1.879766990884907e-06, "loss": 0.6817, "step": 3958 }, { "epoch": 0.57, "grad_norm": 6.670253753662109, "learning_rate": 1.8796917315133097e-06, "loss": 0.7651, "step": 3959 }, { "epoch": 0.57, "grad_norm": 5.944447040557861, "learning_rate": 1.879616450102466e-06, "loss": 0.7781, "step": 3960 }, { "epoch": 0.57, "grad_norm": 7.246452331542969, "learning_rate": 1.8795411466542609e-06, "loss": 0.7701, "step": 3961 }, { "epoch": 0.57, "grad_norm": 5.765483856201172, "learning_rate": 1.8794658211705814e-06, "loss": 0.7197, "step": 3962 }, { "epoch": 0.58, "grad_norm": 5.7339982986450195, "learning_rate": 1.879390473653315e-06, "loss": 0.7341, "step": 3963 }, { "epoch": 0.58, "grad_norm": 5.451638698577881, "learning_rate": 1.879315104104349e-06, "loss": 0.6254, "step": 3964 }, { "epoch": 0.58, "grad_norm": 6.165822982788086, "learning_rate": 1.8792397125255719e-06, "loss": 0.6733, "step": 3965 }, { "epoch": 0.58, "grad_norm": 6.15878963470459, "learning_rate": 1.8791642989188724e-06, "loss": 0.7225, "step": 3966 }, { "epoch": 0.58, "grad_norm": 6.802229404449463, "learning_rate": 1.8790888632861397e-06, "loss": 0.7043, "step": 3967 }, { "epoch": 0.58, "grad_norm": 5.4579901695251465, "learning_rate": 1.8790134056292641e-06, "loss": 0.6885, "step": 3968 }, { "epoch": 0.58, "grad_norm": 5.65329122543335, "learning_rate": 1.8789379259501357e-06, "loss": 0.7693, "step": 3969 }, { "epoch": 0.58, "grad_norm": 5.461370468139648, "learning_rate": 1.878862424250646e-06, "loss": 0.756, "step": 3970 }, { "epoch": 0.58, "grad_norm": 5.858790874481201, "learning_rate": 1.878786900532686e-06, "loss": 0.7357, "step": 3971 }, { "epoch": 0.58, "grad_norm": 6.04726505279541, "learning_rate": 1.8787113547981481e-06, "loss": 0.7683, "step": 3972 }, { "epoch": 0.58, "grad_norm": 5.770578861236572, "learning_rate": 1.878635787048925e-06, "loss": 0.7136, "step": 3973 }, { "epoch": 0.58, "grad_norm": 5.349279880523682, "learning_rate": 1.87856019728691e-06, "loss": 0.662, "step": 3974 }, { "epoch": 0.58, "grad_norm": 5.219315528869629, "learning_rate": 1.8784845855139965e-06, "loss": 0.6387, "step": 3975 }, { "epoch": 0.58, "grad_norm": 6.524263381958008, "learning_rate": 1.8784089517320794e-06, "loss": 0.7036, "step": 3976 }, { "epoch": 0.58, "grad_norm": 6.171651363372803, "learning_rate": 1.878333295943053e-06, "loss": 0.8065, "step": 3977 }, { "epoch": 0.58, "grad_norm": 5.658811569213867, "learning_rate": 1.8782576181488134e-06, "loss": 0.7598, "step": 3978 }, { "epoch": 0.58, "grad_norm": 5.636837959289551, "learning_rate": 1.878181918351256e-06, "loss": 0.7025, "step": 3979 }, { "epoch": 0.58, "grad_norm": 6.022121429443359, "learning_rate": 1.8781061965522772e-06, "loss": 0.6851, "step": 3980 }, { "epoch": 0.58, "grad_norm": 5.428211212158203, "learning_rate": 1.8780304527537748e-06, "loss": 0.6601, "step": 3981 }, { "epoch": 0.58, "grad_norm": 5.523154258728027, "learning_rate": 1.8779546869576461e-06, "loss": 0.7411, "step": 3982 }, { "epoch": 0.58, "grad_norm": 5.815410614013672, "learning_rate": 1.877878899165789e-06, "loss": 0.7504, "step": 3983 }, { "epoch": 0.58, "grad_norm": 6.330442428588867, "learning_rate": 1.8778030893801024e-06, "loss": 0.7381, "step": 3984 }, { "epoch": 0.58, "grad_norm": 6.52655553817749, "learning_rate": 1.877727257602486e-06, "loss": 0.7769, "step": 3985 }, { "epoch": 0.58, "grad_norm": 5.456254005432129, "learning_rate": 1.8776514038348393e-06, "loss": 0.7116, "step": 3986 }, { "epoch": 0.58, "grad_norm": 6.259457111358643, "learning_rate": 1.8775755280790625e-06, "loss": 0.7517, "step": 3987 }, { "epoch": 0.58, "grad_norm": 6.166654586791992, "learning_rate": 1.877499630337057e-06, "loss": 0.6798, "step": 3988 }, { "epoch": 0.58, "grad_norm": 5.99932861328125, "learning_rate": 1.8774237106107237e-06, "loss": 0.708, "step": 3989 }, { "epoch": 0.58, "grad_norm": 7.701524257659912, "learning_rate": 1.8773477689019652e-06, "loss": 0.759, "step": 3990 }, { "epoch": 0.58, "grad_norm": 5.843959331512451, "learning_rate": 1.877271805212684e-06, "loss": 0.6981, "step": 3991 }, { "epoch": 0.58, "grad_norm": 5.490057468414307, "learning_rate": 1.877195819544783e-06, "loss": 0.7489, "step": 3992 }, { "epoch": 0.58, "grad_norm": 6.014688968658447, "learning_rate": 1.877119811900166e-06, "loss": 0.762, "step": 3993 }, { "epoch": 0.58, "grad_norm": 5.834746837615967, "learning_rate": 1.8770437822807374e-06, "loss": 0.7298, "step": 3994 }, { "epoch": 0.58, "grad_norm": 5.7418036460876465, "learning_rate": 1.8769677306884017e-06, "loss": 0.7826, "step": 3995 }, { "epoch": 0.58, "grad_norm": 7.074935436248779, "learning_rate": 1.8768916571250645e-06, "loss": 0.7468, "step": 3996 }, { "epoch": 0.58, "grad_norm": 5.863196849822998, "learning_rate": 1.8768155615926317e-06, "loss": 0.6366, "step": 3997 }, { "epoch": 0.58, "grad_norm": 5.516063690185547, "learning_rate": 1.8767394440930096e-06, "loss": 0.6285, "step": 3998 }, { "epoch": 0.58, "grad_norm": 5.547677040100098, "learning_rate": 1.8766633046281055e-06, "loss": 0.6904, "step": 3999 }, { "epoch": 0.58, "grad_norm": 5.497638702392578, "learning_rate": 1.8765871431998263e-06, "loss": 0.7241, "step": 4000 }, { "epoch": 0.58, "grad_norm": 5.8341965675354, "learning_rate": 1.876510959810081e-06, "loss": 0.753, "step": 4001 }, { "epoch": 0.58, "grad_norm": 5.580312252044678, "learning_rate": 1.8764347544607775e-06, "loss": 0.7332, "step": 4002 }, { "epoch": 0.58, "grad_norm": 5.947672367095947, "learning_rate": 1.8763585271538256e-06, "loss": 0.6382, "step": 4003 }, { "epoch": 0.58, "grad_norm": 6.19627571105957, "learning_rate": 1.8762822778911343e-06, "loss": 0.7404, "step": 4004 }, { "epoch": 0.58, "grad_norm": 6.218685626983643, "learning_rate": 1.8762060066746146e-06, "loss": 0.6706, "step": 4005 }, { "epoch": 0.58, "grad_norm": 5.61364221572876, "learning_rate": 1.876129713506177e-06, "loss": 0.7726, "step": 4006 }, { "epoch": 0.58, "grad_norm": 5.439868450164795, "learning_rate": 1.8760533983877332e-06, "loss": 0.7441, "step": 4007 }, { "epoch": 0.58, "grad_norm": 6.54960823059082, "learning_rate": 1.8759770613211945e-06, "loss": 0.7168, "step": 4008 }, { "epoch": 0.58, "grad_norm": 5.423747539520264, "learning_rate": 1.875900702308474e-06, "loss": 0.7264, "step": 4009 }, { "epoch": 0.58, "grad_norm": 5.147080898284912, "learning_rate": 1.875824321351485e-06, "loss": 0.697, "step": 4010 }, { "epoch": 0.58, "grad_norm": 5.919625282287598, "learning_rate": 1.8757479184521401e-06, "loss": 0.771, "step": 4011 }, { "epoch": 0.58, "grad_norm": 5.147916316986084, "learning_rate": 1.8756714936123546e-06, "loss": 0.7603, "step": 4012 }, { "epoch": 0.58, "grad_norm": 6.027675151824951, "learning_rate": 1.8755950468340423e-06, "loss": 0.7333, "step": 4013 }, { "epoch": 0.58, "grad_norm": 6.043112754821777, "learning_rate": 1.8755185781191188e-06, "loss": 0.7798, "step": 4014 }, { "epoch": 0.58, "grad_norm": 5.592620849609375, "learning_rate": 1.8754420874695e-06, "loss": 0.756, "step": 4015 }, { "epoch": 0.58, "grad_norm": 6.3035173416137695, "learning_rate": 1.875365574887102e-06, "loss": 0.7073, "step": 4016 }, { "epoch": 0.58, "grad_norm": 5.413515567779541, "learning_rate": 1.875289040373842e-06, "loss": 0.6791, "step": 4017 }, { "epoch": 0.58, "grad_norm": 6.606523036956787, "learning_rate": 1.8752124839316373e-06, "loss": 0.7023, "step": 4018 }, { "epoch": 0.58, "grad_norm": 6.20028829574585, "learning_rate": 1.8751359055624057e-06, "loss": 0.6753, "step": 4019 }, { "epoch": 0.58, "grad_norm": 5.299623489379883, "learning_rate": 1.8750593052680662e-06, "loss": 0.7072, "step": 4020 }, { "epoch": 0.58, "grad_norm": 5.30003547668457, "learning_rate": 1.8749826830505374e-06, "loss": 0.7395, "step": 4021 }, { "epoch": 0.58, "grad_norm": 5.4178948402404785, "learning_rate": 1.8749060389117392e-06, "loss": 0.7311, "step": 4022 }, { "epoch": 0.58, "grad_norm": 6.124995231628418, "learning_rate": 1.8748293728535918e-06, "loss": 0.7691, "step": 4023 }, { "epoch": 0.58, "grad_norm": 5.956221103668213, "learning_rate": 1.874752684878016e-06, "loss": 0.7402, "step": 4024 }, { "epoch": 0.58, "grad_norm": 5.516817569732666, "learning_rate": 1.8746759749869328e-06, "loss": 0.7433, "step": 4025 }, { "epoch": 0.58, "grad_norm": 6.717774391174316, "learning_rate": 1.8745992431822645e-06, "loss": 0.7533, "step": 4026 }, { "epoch": 0.58, "grad_norm": 5.677689075469971, "learning_rate": 1.874522489465933e-06, "loss": 0.7432, "step": 4027 }, { "epoch": 0.58, "grad_norm": 6.208409786224365, "learning_rate": 1.8744457138398615e-06, "loss": 0.7702, "step": 4028 }, { "epoch": 0.58, "grad_norm": 5.801358699798584, "learning_rate": 1.8743689163059736e-06, "loss": 0.6413, "step": 4029 }, { "epoch": 0.58, "grad_norm": 5.890720844268799, "learning_rate": 1.8742920968661934e-06, "loss": 0.7538, "step": 4030 }, { "epoch": 0.58, "grad_norm": 6.281124114990234, "learning_rate": 1.8742152555224448e-06, "loss": 0.7238, "step": 4031 }, { "epoch": 0.59, "grad_norm": 5.6102986335754395, "learning_rate": 1.8741383922766536e-06, "loss": 0.7465, "step": 4032 }, { "epoch": 0.59, "grad_norm": 5.9770708084106445, "learning_rate": 1.8740615071307455e-06, "loss": 0.771, "step": 4033 }, { "epoch": 0.59, "grad_norm": 5.308955669403076, "learning_rate": 1.8739846000866464e-06, "loss": 0.7325, "step": 4034 }, { "epoch": 0.59, "grad_norm": 5.61254358291626, "learning_rate": 1.8739076711462834e-06, "loss": 0.7597, "step": 4035 }, { "epoch": 0.59, "grad_norm": 5.60277795791626, "learning_rate": 1.8738307203115832e-06, "loss": 0.727, "step": 4036 }, { "epoch": 0.59, "grad_norm": 5.495035648345947, "learning_rate": 1.8737537475844745e-06, "loss": 0.7097, "step": 4037 }, { "epoch": 0.59, "grad_norm": 5.626792907714844, "learning_rate": 1.8736767529668853e-06, "loss": 0.6687, "step": 4038 }, { "epoch": 0.59, "grad_norm": 5.84913969039917, "learning_rate": 1.8735997364607445e-06, "loss": 0.7435, "step": 4039 }, { "epoch": 0.59, "grad_norm": 5.993988990783691, "learning_rate": 1.873522698067982e-06, "loss": 0.6348, "step": 4040 }, { "epoch": 0.59, "grad_norm": 5.416146755218506, "learning_rate": 1.8734456377905274e-06, "loss": 0.7169, "step": 4041 }, { "epoch": 0.59, "grad_norm": 6.4573211669921875, "learning_rate": 1.8733685556303116e-06, "loss": 0.7374, "step": 4042 }, { "epoch": 0.59, "grad_norm": 5.881393909454346, "learning_rate": 1.8732914515892657e-06, "loss": 0.6853, "step": 4043 }, { "epoch": 0.59, "grad_norm": 5.6639404296875, "learning_rate": 1.8732143256693215e-06, "loss": 0.7474, "step": 4044 }, { "epoch": 0.59, "grad_norm": 5.920217514038086, "learning_rate": 1.8731371778724113e-06, "loss": 0.7622, "step": 4045 }, { "epoch": 0.59, "grad_norm": 5.100179195404053, "learning_rate": 1.8730600082004676e-06, "loss": 0.6857, "step": 4046 }, { "epoch": 0.59, "grad_norm": 6.079025745391846, "learning_rate": 1.872982816655424e-06, "loss": 0.7585, "step": 4047 }, { "epoch": 0.59, "grad_norm": 6.954423904418945, "learning_rate": 1.8729056032392144e-06, "loss": 0.7955, "step": 4048 }, { "epoch": 0.59, "grad_norm": 5.301459789276123, "learning_rate": 1.8728283679537733e-06, "loss": 0.6193, "step": 4049 }, { "epoch": 0.59, "grad_norm": 6.357800006866455, "learning_rate": 1.8727511108010354e-06, "loss": 0.7967, "step": 4050 }, { "epoch": 0.59, "grad_norm": 5.947714805603027, "learning_rate": 1.8726738317829368e-06, "loss": 0.6499, "step": 4051 }, { "epoch": 0.59, "grad_norm": 5.758392810821533, "learning_rate": 1.872596530901413e-06, "loss": 0.7533, "step": 4052 }, { "epoch": 0.59, "grad_norm": 5.619483470916748, "learning_rate": 1.8725192081584012e-06, "loss": 0.6555, "step": 4053 }, { "epoch": 0.59, "grad_norm": 5.530156135559082, "learning_rate": 1.872441863555838e-06, "loss": 0.7382, "step": 4054 }, { "epoch": 0.59, "grad_norm": 5.351165294647217, "learning_rate": 1.872364497095662e-06, "loss": 0.7347, "step": 4055 }, { "epoch": 0.59, "grad_norm": 6.111390113830566, "learning_rate": 1.8722871087798106e-06, "loss": 0.6911, "step": 4056 }, { "epoch": 0.59, "grad_norm": 5.954155921936035, "learning_rate": 1.8722096986102232e-06, "loss": 0.7384, "step": 4057 }, { "epoch": 0.59, "grad_norm": 6.007813930511475, "learning_rate": 1.8721322665888389e-06, "loss": 0.7402, "step": 4058 }, { "epoch": 0.59, "grad_norm": 5.331148624420166, "learning_rate": 1.8720548127175977e-06, "loss": 0.67, "step": 4059 }, { "epoch": 0.59, "grad_norm": 6.284346580505371, "learning_rate": 1.8719773369984399e-06, "loss": 0.7693, "step": 4060 }, { "epoch": 0.59, "grad_norm": 6.457026958465576, "learning_rate": 1.8718998394333073e-06, "loss": 0.714, "step": 4061 }, { "epoch": 0.59, "grad_norm": 6.800205230712891, "learning_rate": 1.8718223200241403e-06, "loss": 0.8805, "step": 4062 }, { "epoch": 0.59, "grad_norm": 6.147602558135986, "learning_rate": 1.871744778772882e-06, "loss": 0.8244, "step": 4063 }, { "epoch": 0.59, "grad_norm": 6.264253616333008, "learning_rate": 1.8716672156814744e-06, "loss": 0.7451, "step": 4064 }, { "epoch": 0.59, "grad_norm": 6.087939739227295, "learning_rate": 1.8715896307518615e-06, "loss": 0.7771, "step": 4065 }, { "epoch": 0.59, "grad_norm": 6.565136909484863, "learning_rate": 1.871512023985986e-06, "loss": 0.7703, "step": 4066 }, { "epoch": 0.59, "grad_norm": 5.477804183959961, "learning_rate": 1.8714343953857932e-06, "loss": 0.8057, "step": 4067 }, { "epoch": 0.59, "grad_norm": 6.676615238189697, "learning_rate": 1.8713567449532276e-06, "loss": 0.7724, "step": 4068 }, { "epoch": 0.59, "grad_norm": 5.8039326667785645, "learning_rate": 1.8712790726902343e-06, "loss": 0.6656, "step": 4069 }, { "epoch": 0.59, "grad_norm": 4.857898712158203, "learning_rate": 1.8712013785987596e-06, "loss": 0.6769, "step": 4070 }, { "epoch": 0.59, "grad_norm": 6.600018501281738, "learning_rate": 1.87112366268075e-06, "loss": 0.7905, "step": 4071 }, { "epoch": 0.59, "grad_norm": 5.691854476928711, "learning_rate": 1.8710459249381522e-06, "loss": 0.7588, "step": 4072 }, { "epoch": 0.59, "grad_norm": 5.595340251922607, "learning_rate": 1.8709681653729143e-06, "loss": 0.6994, "step": 4073 }, { "epoch": 0.59, "grad_norm": 6.584977626800537, "learning_rate": 1.8708903839869843e-06, "loss": 0.7341, "step": 4074 }, { "epoch": 0.59, "grad_norm": 5.945180892944336, "learning_rate": 1.8708125807823106e-06, "loss": 0.7203, "step": 4075 }, { "epoch": 0.59, "grad_norm": 5.896573066711426, "learning_rate": 1.8707347557608428e-06, "loss": 0.6807, "step": 4076 }, { "epoch": 0.59, "grad_norm": 5.438932418823242, "learning_rate": 1.87065690892453e-06, "loss": 0.7046, "step": 4077 }, { "epoch": 0.59, "grad_norm": 6.418335914611816, "learning_rate": 1.8705790402753237e-06, "loss": 0.7093, "step": 4078 }, { "epoch": 0.59, "grad_norm": 6.049032211303711, "learning_rate": 1.8705011498151735e-06, "loss": 0.8299, "step": 4079 }, { "epoch": 0.59, "grad_norm": 5.687956809997559, "learning_rate": 1.8704232375460318e-06, "loss": 0.7611, "step": 4080 }, { "epoch": 0.59, "grad_norm": 5.382532119750977, "learning_rate": 1.87034530346985e-06, "loss": 0.7003, "step": 4081 }, { "epoch": 0.59, "grad_norm": 5.785070419311523, "learning_rate": 1.8702673475885807e-06, "loss": 0.6979, "step": 4082 }, { "epoch": 0.59, "grad_norm": 5.7478461265563965, "learning_rate": 1.8701893699041774e-06, "loss": 0.698, "step": 4083 }, { "epoch": 0.59, "grad_norm": 5.432611465454102, "learning_rate": 1.8701113704185927e-06, "loss": 0.7013, "step": 4084 }, { "epoch": 0.59, "grad_norm": 5.886168003082275, "learning_rate": 1.870033349133782e-06, "loss": 0.8225, "step": 4085 }, { "epoch": 0.59, "grad_norm": 6.322279930114746, "learning_rate": 1.869955306051699e-06, "loss": 0.7488, "step": 4086 }, { "epoch": 0.59, "grad_norm": 5.014490604400635, "learning_rate": 1.8698772411742995e-06, "loss": 0.6788, "step": 4087 }, { "epoch": 0.59, "grad_norm": 6.039393424987793, "learning_rate": 1.8697991545035393e-06, "loss": 0.7729, "step": 4088 }, { "epoch": 0.59, "grad_norm": 5.632879257202148, "learning_rate": 1.8697210460413744e-06, "loss": 0.7514, "step": 4089 }, { "epoch": 0.59, "grad_norm": 5.597829818725586, "learning_rate": 1.8696429157897617e-06, "loss": 0.6859, "step": 4090 }, { "epoch": 0.59, "grad_norm": 5.372077465057373, "learning_rate": 1.8695647637506588e-06, "loss": 0.762, "step": 4091 }, { "epoch": 0.59, "grad_norm": 6.099630355834961, "learning_rate": 1.8694865899260236e-06, "loss": 0.7558, "step": 4092 }, { "epoch": 0.59, "grad_norm": 5.342698574066162, "learning_rate": 1.8694083943178152e-06, "loss": 0.72, "step": 4093 }, { "epoch": 0.59, "grad_norm": 5.488150596618652, "learning_rate": 1.8693301769279915e-06, "loss": 0.8129, "step": 4094 }, { "epoch": 0.59, "grad_norm": 5.888788223266602, "learning_rate": 1.869251937758513e-06, "loss": 0.7645, "step": 4095 }, { "epoch": 0.59, "grad_norm": 5.873970985412598, "learning_rate": 1.8691736768113395e-06, "loss": 0.7257, "step": 4096 }, { "epoch": 0.59, "grad_norm": 5.460257530212402, "learning_rate": 1.8690953940884318e-06, "loss": 0.7331, "step": 4097 }, { "epoch": 0.59, "grad_norm": 5.622230529785156, "learning_rate": 1.8690170895917513e-06, "loss": 0.6893, "step": 4098 }, { "epoch": 0.59, "grad_norm": 6.575131416320801, "learning_rate": 1.8689387633232592e-06, "loss": 0.8148, "step": 4099 }, { "epoch": 0.59, "grad_norm": 5.468935966491699, "learning_rate": 1.8688604152849185e-06, "loss": 0.7011, "step": 4100 }, { "epoch": 0.6, "grad_norm": 5.751397609710693, "learning_rate": 1.868782045478692e-06, "loss": 0.7472, "step": 4101 }, { "epoch": 0.6, "grad_norm": 6.556057453155518, "learning_rate": 1.8687036539065427e-06, "loss": 0.7158, "step": 4102 }, { "epoch": 0.6, "grad_norm": 5.240035533905029, "learning_rate": 1.8686252405704353e-06, "loss": 0.6892, "step": 4103 }, { "epoch": 0.6, "grad_norm": 5.541283130645752, "learning_rate": 1.8685468054723334e-06, "loss": 0.7716, "step": 4104 }, { "epoch": 0.6, "grad_norm": 6.06993293762207, "learning_rate": 1.8684683486142028e-06, "loss": 0.8436, "step": 4105 }, { "epoch": 0.6, "grad_norm": 5.942441940307617, "learning_rate": 1.8683898699980087e-06, "loss": 0.7451, "step": 4106 }, { "epoch": 0.6, "grad_norm": 5.129802227020264, "learning_rate": 1.8683113696257172e-06, "loss": 0.6714, "step": 4107 }, { "epoch": 0.6, "grad_norm": 5.732802867889404, "learning_rate": 1.8682328474992954e-06, "loss": 0.7401, "step": 4108 }, { "epoch": 0.6, "grad_norm": 5.565478324890137, "learning_rate": 1.8681543036207103e-06, "loss": 0.7118, "step": 4109 }, { "epoch": 0.6, "grad_norm": 5.8329758644104, "learning_rate": 1.86807573799193e-06, "loss": 0.7554, "step": 4110 }, { "epoch": 0.6, "grad_norm": 5.4220194816589355, "learning_rate": 1.8679971506149224e-06, "loss": 0.7229, "step": 4111 }, { "epoch": 0.6, "grad_norm": 5.716482639312744, "learning_rate": 1.8679185414916562e-06, "loss": 0.6438, "step": 4112 }, { "epoch": 0.6, "grad_norm": 5.551325798034668, "learning_rate": 1.8678399106241014e-06, "loss": 0.7665, "step": 4113 }, { "epoch": 0.6, "grad_norm": 5.74342155456543, "learning_rate": 1.8677612580142278e-06, "loss": 0.6992, "step": 4114 }, { "epoch": 0.6, "grad_norm": 4.980483531951904, "learning_rate": 1.867682583664006e-06, "loss": 0.7046, "step": 4115 }, { "epoch": 0.6, "grad_norm": 5.692668437957764, "learning_rate": 1.8676038875754068e-06, "loss": 0.7102, "step": 4116 }, { "epoch": 0.6, "grad_norm": 5.40993070602417, "learning_rate": 1.8675251697504018e-06, "loss": 0.7254, "step": 4117 }, { "epoch": 0.6, "grad_norm": 5.999715328216553, "learning_rate": 1.8674464301909631e-06, "loss": 0.6994, "step": 4118 }, { "epoch": 0.6, "grad_norm": 6.18556022644043, "learning_rate": 1.8673676688990638e-06, "loss": 0.7416, "step": 4119 }, { "epoch": 0.6, "grad_norm": 5.730928421020508, "learning_rate": 1.8672888858766767e-06, "loss": 0.6683, "step": 4120 }, { "epoch": 0.6, "grad_norm": 6.739145755767822, "learning_rate": 1.8672100811257758e-06, "loss": 0.7882, "step": 4121 }, { "epoch": 0.6, "grad_norm": 5.319664001464844, "learning_rate": 1.8671312546483355e-06, "loss": 0.6852, "step": 4122 }, { "epoch": 0.6, "grad_norm": 5.8333420753479, "learning_rate": 1.8670524064463305e-06, "loss": 0.6661, "step": 4123 }, { "epoch": 0.6, "grad_norm": 5.957159519195557, "learning_rate": 1.8669735365217363e-06, "loss": 0.7945, "step": 4124 }, { "epoch": 0.6, "grad_norm": 5.953758239746094, "learning_rate": 1.8668946448765289e-06, "loss": 0.7357, "step": 4125 }, { "epoch": 0.6, "grad_norm": 5.392929553985596, "learning_rate": 1.8668157315126845e-06, "loss": 0.6862, "step": 4126 }, { "epoch": 0.6, "grad_norm": 5.268500804901123, "learning_rate": 1.8667367964321805e-06, "loss": 0.7548, "step": 4127 }, { "epoch": 0.6, "grad_norm": 5.4635210037231445, "learning_rate": 1.8666578396369943e-06, "loss": 0.6745, "step": 4128 }, { "epoch": 0.6, "grad_norm": 5.512673854827881, "learning_rate": 1.866578861129104e-06, "loss": 0.5903, "step": 4129 }, { "epoch": 0.6, "grad_norm": 6.135291576385498, "learning_rate": 1.8664998609104888e-06, "loss": 0.7369, "step": 4130 }, { "epoch": 0.6, "grad_norm": 5.514735698699951, "learning_rate": 1.866420838983127e-06, "loss": 0.7221, "step": 4131 }, { "epoch": 0.6, "grad_norm": 5.412596225738525, "learning_rate": 1.8663417953489993e-06, "loss": 0.6855, "step": 4132 }, { "epoch": 0.6, "grad_norm": 5.986188888549805, "learning_rate": 1.8662627300100853e-06, "loss": 0.7639, "step": 4133 }, { "epoch": 0.6, "grad_norm": 4.855849266052246, "learning_rate": 1.8661836429683662e-06, "loss": 0.6786, "step": 4134 }, { "epoch": 0.6, "grad_norm": 5.692517280578613, "learning_rate": 1.8661045342258234e-06, "loss": 0.7535, "step": 4135 }, { "epoch": 0.6, "grad_norm": 5.740110874176025, "learning_rate": 1.8660254037844386e-06, "loss": 0.8031, "step": 4136 }, { "epoch": 0.6, "grad_norm": 5.601212501525879, "learning_rate": 1.8659462516461946e-06, "loss": 0.7734, "step": 4137 }, { "epoch": 0.6, "grad_norm": 5.854885578155518, "learning_rate": 1.8658670778130742e-06, "loss": 0.7468, "step": 4138 }, { "epoch": 0.6, "grad_norm": 5.589875221252441, "learning_rate": 1.865787882287061e-06, "loss": 0.7094, "step": 4139 }, { "epoch": 0.6, "grad_norm": 5.525818824768066, "learning_rate": 1.8657086650701392e-06, "loss": 0.6776, "step": 4140 }, { "epoch": 0.6, "grad_norm": 5.160012245178223, "learning_rate": 1.8656294261642933e-06, "loss": 0.6548, "step": 4141 }, { "epoch": 0.6, "grad_norm": 5.517565727233887, "learning_rate": 1.8655501655715092e-06, "loss": 0.6514, "step": 4142 }, { "epoch": 0.6, "grad_norm": 5.427781581878662, "learning_rate": 1.8654708832937715e-06, "loss": 0.7747, "step": 4143 }, { "epoch": 0.6, "grad_norm": 5.359044075012207, "learning_rate": 1.865391579333067e-06, "loss": 0.7733, "step": 4144 }, { "epoch": 0.6, "grad_norm": 5.18417501449585, "learning_rate": 1.865312253691383e-06, "loss": 0.6629, "step": 4145 }, { "epoch": 0.6, "grad_norm": 6.03062105178833, "learning_rate": 1.8652329063707062e-06, "loss": 0.7223, "step": 4146 }, { "epoch": 0.6, "grad_norm": 5.652655601501465, "learning_rate": 1.865153537373025e-06, "loss": 0.7257, "step": 4147 }, { "epoch": 0.6, "grad_norm": 6.613595485687256, "learning_rate": 1.8650741467003274e-06, "loss": 0.7261, "step": 4148 }, { "epoch": 0.6, "grad_norm": 5.992877006530762, "learning_rate": 1.8649947343546028e-06, "loss": 0.755, "step": 4149 }, { "epoch": 0.6, "grad_norm": 6.293062210083008, "learning_rate": 1.8649153003378405e-06, "loss": 0.6718, "step": 4150 }, { "epoch": 0.6, "grad_norm": 6.306310176849365, "learning_rate": 1.8648358446520308e-06, "loss": 0.7179, "step": 4151 }, { "epoch": 0.6, "grad_norm": 6.372134685516357, "learning_rate": 1.8647563672991641e-06, "loss": 0.7839, "step": 4152 }, { "epoch": 0.6, "grad_norm": 5.4845709800720215, "learning_rate": 1.864676868281232e-06, "loss": 0.6982, "step": 4153 }, { "epoch": 0.6, "grad_norm": 6.172854900360107, "learning_rate": 1.8645973476002257e-06, "loss": 0.716, "step": 4154 }, { "epoch": 0.6, "grad_norm": 5.823784351348877, "learning_rate": 1.8645178052581375e-06, "loss": 0.6607, "step": 4155 }, { "epoch": 0.6, "grad_norm": 5.679791450500488, "learning_rate": 1.8644382412569608e-06, "loss": 0.6844, "step": 4156 }, { "epoch": 0.6, "grad_norm": 6.004232406616211, "learning_rate": 1.8643586555986883e-06, "loss": 0.7203, "step": 4157 }, { "epoch": 0.6, "grad_norm": 5.301802158355713, "learning_rate": 1.864279048285314e-06, "loss": 0.7796, "step": 4158 }, { "epoch": 0.6, "grad_norm": 5.4258623123168945, "learning_rate": 1.8641994193188327e-06, "loss": 0.8088, "step": 4159 }, { "epoch": 0.6, "grad_norm": 5.990303993225098, "learning_rate": 1.8641197687012389e-06, "loss": 0.764, "step": 4160 }, { "epoch": 0.6, "grad_norm": 6.094046115875244, "learning_rate": 1.8640400964345284e-06, "loss": 0.7304, "step": 4161 }, { "epoch": 0.6, "grad_norm": 5.25263786315918, "learning_rate": 1.8639604025206972e-06, "loss": 0.7505, "step": 4162 }, { "epoch": 0.6, "grad_norm": 6.075364589691162, "learning_rate": 1.863880686961742e-06, "loss": 0.7222, "step": 4163 }, { "epoch": 0.6, "grad_norm": 5.7475433349609375, "learning_rate": 1.8638009497596596e-06, "loss": 0.6674, "step": 4164 }, { "epoch": 0.6, "grad_norm": 6.73552942276001, "learning_rate": 1.8637211909164484e-06, "loss": 0.6874, "step": 4165 }, { "epoch": 0.6, "grad_norm": 5.374031066894531, "learning_rate": 1.8636414104341056e-06, "loss": 0.7375, "step": 4166 }, { "epoch": 0.6, "grad_norm": 5.826013565063477, "learning_rate": 1.863561608314631e-06, "loss": 0.694, "step": 4167 }, { "epoch": 0.6, "grad_norm": 6.056952476501465, "learning_rate": 1.8634817845600233e-06, "loss": 0.7381, "step": 4168 }, { "epoch": 0.6, "grad_norm": 5.087046146392822, "learning_rate": 1.8634019391722822e-06, "loss": 0.708, "step": 4169 }, { "epoch": 0.61, "grad_norm": 5.469954013824463, "learning_rate": 1.8633220721534087e-06, "loss": 0.6493, "step": 4170 }, { "epoch": 0.61, "grad_norm": 5.552499294281006, "learning_rate": 1.8632421835054032e-06, "loss": 0.7858, "step": 4171 }, { "epoch": 0.61, "grad_norm": 6.149993419647217, "learning_rate": 1.8631622732302677e-06, "loss": 0.7225, "step": 4172 }, { "epoch": 0.61, "grad_norm": 5.337196350097656, "learning_rate": 1.863082341330004e-06, "loss": 0.6574, "step": 4173 }, { "epoch": 0.61, "grad_norm": 5.794849395751953, "learning_rate": 1.8630023878066143e-06, "loss": 0.8278, "step": 4174 }, { "epoch": 0.61, "grad_norm": 5.85410737991333, "learning_rate": 1.8629224126621021e-06, "loss": 0.6219, "step": 4175 }, { "epoch": 0.61, "grad_norm": 6.017576694488525, "learning_rate": 1.862842415898471e-06, "loss": 0.7315, "step": 4176 }, { "epoch": 0.61, "grad_norm": 6.522163391113281, "learning_rate": 1.8627623975177254e-06, "loss": 0.7161, "step": 4177 }, { "epoch": 0.61, "grad_norm": 5.279333591461182, "learning_rate": 1.8626823575218693e-06, "loss": 0.6399, "step": 4178 }, { "epoch": 0.61, "grad_norm": 5.285735607147217, "learning_rate": 1.8626022959129085e-06, "loss": 0.6787, "step": 4179 }, { "epoch": 0.61, "grad_norm": 5.2094011306762695, "learning_rate": 1.862522212692849e-06, "loss": 0.7344, "step": 4180 }, { "epoch": 0.61, "grad_norm": 6.40161657333374, "learning_rate": 1.8624421078636967e-06, "loss": 0.7636, "step": 4181 }, { "epoch": 0.61, "grad_norm": 5.453457355499268, "learning_rate": 1.862361981427459e-06, "loss": 0.6607, "step": 4182 }, { "epoch": 0.61, "grad_norm": 5.664764404296875, "learning_rate": 1.862281833386143e-06, "loss": 0.7092, "step": 4183 }, { "epoch": 0.61, "grad_norm": 5.294003486633301, "learning_rate": 1.8622016637417564e-06, "loss": 0.6671, "step": 4184 }, { "epoch": 0.61, "grad_norm": 5.279654502868652, "learning_rate": 1.8621214724963083e-06, "loss": 0.7377, "step": 4185 }, { "epoch": 0.61, "grad_norm": 6.3284759521484375, "learning_rate": 1.8620412596518073e-06, "loss": 0.8308, "step": 4186 }, { "epoch": 0.61, "grad_norm": 5.356139183044434, "learning_rate": 1.8619610252102635e-06, "loss": 0.6645, "step": 4187 }, { "epoch": 0.61, "grad_norm": 5.5352606773376465, "learning_rate": 1.8618807691736866e-06, "loss": 0.7672, "step": 4188 }, { "epoch": 0.61, "grad_norm": 5.345091819763184, "learning_rate": 1.8618004915440873e-06, "loss": 0.7063, "step": 4189 }, { "epoch": 0.61, "grad_norm": 5.22318696975708, "learning_rate": 1.861720192323477e-06, "loss": 0.7321, "step": 4190 }, { "epoch": 0.61, "grad_norm": 5.8897881507873535, "learning_rate": 1.8616398715138676e-06, "loss": 0.7059, "step": 4191 }, { "epoch": 0.61, "grad_norm": 5.781179428100586, "learning_rate": 1.8615595291172712e-06, "loss": 0.7788, "step": 4192 }, { "epoch": 0.61, "grad_norm": 5.662970066070557, "learning_rate": 1.8614791651357006e-06, "loss": 0.6842, "step": 4193 }, { "epoch": 0.61, "grad_norm": 5.23069429397583, "learning_rate": 1.8613987795711693e-06, "loss": 0.6618, "step": 4194 }, { "epoch": 0.61, "grad_norm": 5.667871475219727, "learning_rate": 1.8613183724256912e-06, "loss": 0.7434, "step": 4195 }, { "epoch": 0.61, "grad_norm": 6.345205307006836, "learning_rate": 1.861237943701281e-06, "loss": 0.7699, "step": 4196 }, { "epoch": 0.61, "grad_norm": 6.275737285614014, "learning_rate": 1.8611574933999532e-06, "loss": 0.7285, "step": 4197 }, { "epoch": 0.61, "grad_norm": 5.773622989654541, "learning_rate": 1.8610770215237239e-06, "loss": 0.6882, "step": 4198 }, { "epoch": 0.61, "grad_norm": 5.591474533081055, "learning_rate": 1.8609965280746085e-06, "loss": 0.738, "step": 4199 }, { "epoch": 0.61, "grad_norm": 6.1261820793151855, "learning_rate": 1.8609160130546245e-06, "loss": 0.7474, "step": 4200 }, { "epoch": 0.61, "grad_norm": 6.179828643798828, "learning_rate": 1.8608354764657883e-06, "loss": 0.7173, "step": 4201 }, { "epoch": 0.61, "grad_norm": 5.708134174346924, "learning_rate": 1.860754918310118e-06, "loss": 0.6753, "step": 4202 }, { "epoch": 0.61, "grad_norm": 5.675119876861572, "learning_rate": 1.8606743385896322e-06, "loss": 0.6804, "step": 4203 }, { "epoch": 0.61, "grad_norm": 5.438842296600342, "learning_rate": 1.860593737306349e-06, "loss": 0.767, "step": 4204 }, { "epoch": 0.61, "grad_norm": 5.411402225494385, "learning_rate": 1.8605131144622879e-06, "loss": 0.7255, "step": 4205 }, { "epoch": 0.61, "grad_norm": 5.882466793060303, "learning_rate": 1.860432470059469e-06, "loss": 0.6915, "step": 4206 }, { "epoch": 0.61, "grad_norm": 6.297750949859619, "learning_rate": 1.8603518040999127e-06, "loss": 0.8059, "step": 4207 }, { "epoch": 0.61, "grad_norm": 5.649979591369629, "learning_rate": 1.8602711165856397e-06, "loss": 0.7558, "step": 4208 }, { "epoch": 0.61, "grad_norm": 5.438479900360107, "learning_rate": 1.8601904075186718e-06, "loss": 0.6691, "step": 4209 }, { "epoch": 0.61, "grad_norm": 5.373255729675293, "learning_rate": 1.860109676901031e-06, "loss": 0.7311, "step": 4210 }, { "epoch": 0.61, "grad_norm": 6.009342670440674, "learning_rate": 1.8600289247347395e-06, "loss": 0.6407, "step": 4211 }, { "epoch": 0.61, "grad_norm": 5.876982688903809, "learning_rate": 1.8599481510218209e-06, "loss": 0.7531, "step": 4212 }, { "epoch": 0.61, "grad_norm": 5.1835713386535645, "learning_rate": 1.8598673557642988e-06, "loss": 0.6715, "step": 4213 }, { "epoch": 0.61, "grad_norm": 5.724349498748779, "learning_rate": 1.859786538964197e-06, "loss": 0.7631, "step": 4214 }, { "epoch": 0.61, "grad_norm": 5.763089179992676, "learning_rate": 1.8597057006235404e-06, "loss": 0.6825, "step": 4215 }, { "epoch": 0.61, "grad_norm": 6.576652526855469, "learning_rate": 1.8596248407443546e-06, "loss": 0.8138, "step": 4216 }, { "epoch": 0.61, "grad_norm": 5.793856620788574, "learning_rate": 1.8595439593286647e-06, "loss": 0.7214, "step": 4217 }, { "epoch": 0.61, "grad_norm": 5.260420799255371, "learning_rate": 1.8594630563784981e-06, "loss": 0.7307, "step": 4218 }, { "epoch": 0.61, "grad_norm": 5.803476810455322, "learning_rate": 1.859382131895881e-06, "loss": 0.7445, "step": 4219 }, { "epoch": 0.61, "grad_norm": 5.397255897521973, "learning_rate": 1.8593011858828405e-06, "loss": 0.7161, "step": 4220 }, { "epoch": 0.61, "grad_norm": 5.544901371002197, "learning_rate": 1.8592202183414055e-06, "loss": 0.772, "step": 4221 }, { "epoch": 0.61, "grad_norm": 6.009284973144531, "learning_rate": 1.8591392292736035e-06, "loss": 0.8026, "step": 4222 }, { "epoch": 0.61, "grad_norm": 5.521339416503906, "learning_rate": 1.8590582186814644e-06, "loss": 0.7039, "step": 4223 }, { "epoch": 0.61, "grad_norm": 5.826940059661865, "learning_rate": 1.8589771865670174e-06, "loss": 0.7484, "step": 4224 }, { "epoch": 0.61, "grad_norm": 5.9943742752075195, "learning_rate": 1.8588961329322928e-06, "loss": 0.7074, "step": 4225 }, { "epoch": 0.61, "grad_norm": 5.250406265258789, "learning_rate": 1.8588150577793211e-06, "loss": 0.6954, "step": 4226 }, { "epoch": 0.61, "grad_norm": 5.558953285217285, "learning_rate": 1.8587339611101333e-06, "loss": 0.7055, "step": 4227 }, { "epoch": 0.61, "grad_norm": 5.7366557121276855, "learning_rate": 1.8586528429267615e-06, "loss": 0.7208, "step": 4228 }, { "epoch": 0.61, "grad_norm": 5.333653926849365, "learning_rate": 1.8585717032312381e-06, "loss": 0.6837, "step": 4229 }, { "epoch": 0.61, "grad_norm": 5.051896572113037, "learning_rate": 1.8584905420255957e-06, "loss": 0.6601, "step": 4230 }, { "epoch": 0.61, "grad_norm": 5.146703720092773, "learning_rate": 1.8584093593118672e-06, "loss": 0.7304, "step": 4231 }, { "epoch": 0.61, "grad_norm": 5.46462869644165, "learning_rate": 1.8583281550920878e-06, "loss": 0.7978, "step": 4232 }, { "epoch": 0.61, "grad_norm": 5.302628040313721, "learning_rate": 1.8582469293682905e-06, "loss": 0.6999, "step": 4233 }, { "epoch": 0.61, "grad_norm": 5.629015922546387, "learning_rate": 1.8581656821425108e-06, "loss": 0.7512, "step": 4234 }, { "epoch": 0.61, "grad_norm": 5.806324481964111, "learning_rate": 1.8580844134167846e-06, "loss": 0.7395, "step": 4235 }, { "epoch": 0.61, "grad_norm": 5.8188018798828125, "learning_rate": 1.8580031231931477e-06, "loss": 0.7346, "step": 4236 }, { "epoch": 0.61, "grad_norm": 6.399265766143799, "learning_rate": 1.8579218114736363e-06, "loss": 0.8474, "step": 4237 }, { "epoch": 0.61, "grad_norm": 5.597201824188232, "learning_rate": 1.8578404782602883e-06, "loss": 0.6575, "step": 4238 }, { "epoch": 0.62, "grad_norm": 5.816374778747559, "learning_rate": 1.8577591235551408e-06, "loss": 0.6586, "step": 4239 }, { "epoch": 0.62, "grad_norm": 5.888741493225098, "learning_rate": 1.8576777473602322e-06, "loss": 0.7887, "step": 4240 }, { "epoch": 0.62, "grad_norm": 5.582283973693848, "learning_rate": 1.8575963496776012e-06, "loss": 0.7544, "step": 4241 }, { "epoch": 0.62, "grad_norm": 5.745999336242676, "learning_rate": 1.857514930509287e-06, "loss": 0.6695, "step": 4242 }, { "epoch": 0.62, "grad_norm": 5.634275436401367, "learning_rate": 1.8574334898573295e-06, "loss": 0.6671, "step": 4243 }, { "epoch": 0.62, "grad_norm": 5.764673709869385, "learning_rate": 1.8573520277237695e-06, "loss": 0.7612, "step": 4244 }, { "epoch": 0.62, "grad_norm": 6.002939224243164, "learning_rate": 1.8572705441106471e-06, "loss": 0.6838, "step": 4245 }, { "epoch": 0.62, "grad_norm": 7.040661334991455, "learning_rate": 1.8571890390200045e-06, "loss": 0.754, "step": 4246 }, { "epoch": 0.62, "grad_norm": 5.840428352355957, "learning_rate": 1.857107512453883e-06, "loss": 0.7322, "step": 4247 }, { "epoch": 0.62, "grad_norm": 5.742726802825928, "learning_rate": 1.8570259644143254e-06, "loss": 0.7879, "step": 4248 }, { "epoch": 0.62, "grad_norm": 6.007403373718262, "learning_rate": 1.8569443949033749e-06, "loss": 0.7499, "step": 4249 }, { "epoch": 0.62, "grad_norm": 6.372847080230713, "learning_rate": 1.856862803923075e-06, "loss": 0.7866, "step": 4250 }, { "epoch": 0.62, "grad_norm": 5.581432342529297, "learning_rate": 1.8567811914754699e-06, "loss": 0.6988, "step": 4251 }, { "epoch": 0.62, "grad_norm": 6.290899276733398, "learning_rate": 1.8566995575626039e-06, "loss": 0.801, "step": 4252 }, { "epoch": 0.62, "grad_norm": 5.6163716316223145, "learning_rate": 1.8566179021865227e-06, "loss": 0.6998, "step": 4253 }, { "epoch": 0.62, "grad_norm": 5.979979038238525, "learning_rate": 1.8565362253492716e-06, "loss": 0.7436, "step": 4254 }, { "epoch": 0.62, "grad_norm": 5.4812188148498535, "learning_rate": 1.856454527052897e-06, "loss": 0.7011, "step": 4255 }, { "epoch": 0.62, "grad_norm": 5.782737731933594, "learning_rate": 1.8563728072994463e-06, "loss": 0.7389, "step": 4256 }, { "epoch": 0.62, "grad_norm": 6.2337846755981445, "learning_rate": 1.8562910660909662e-06, "loss": 0.7875, "step": 4257 }, { "epoch": 0.62, "grad_norm": 5.84357213973999, "learning_rate": 1.8562093034295042e-06, "loss": 0.7777, "step": 4258 }, { "epoch": 0.62, "grad_norm": 5.175202369689941, "learning_rate": 1.85612751931711e-06, "loss": 0.7795, "step": 4259 }, { "epoch": 0.62, "grad_norm": 6.5058674812316895, "learning_rate": 1.8560457137558317e-06, "loss": 0.7474, "step": 4260 }, { "epoch": 0.62, "grad_norm": 5.488801956176758, "learning_rate": 1.8559638867477187e-06, "loss": 0.7319, "step": 4261 }, { "epoch": 0.62, "grad_norm": 6.122838497161865, "learning_rate": 1.8558820382948216e-06, "loss": 0.6857, "step": 4262 }, { "epoch": 0.62, "grad_norm": 5.642736434936523, "learning_rate": 1.8558001683991908e-06, "loss": 0.7371, "step": 4263 }, { "epoch": 0.62, "grad_norm": 5.914244651794434, "learning_rate": 1.855718277062877e-06, "loss": 0.7106, "step": 4264 }, { "epoch": 0.62, "grad_norm": 5.604862213134766, "learning_rate": 1.8556363642879322e-06, "loss": 0.7832, "step": 4265 }, { "epoch": 0.62, "grad_norm": 6.0812249183654785, "learning_rate": 1.8555544300764089e-06, "loss": 0.7548, "step": 4266 }, { "epoch": 0.62, "grad_norm": 5.844132423400879, "learning_rate": 1.8554724744303592e-06, "loss": 0.7502, "step": 4267 }, { "epoch": 0.62, "grad_norm": 5.864034652709961, "learning_rate": 1.8553904973518365e-06, "loss": 0.7284, "step": 4268 }, { "epoch": 0.62, "grad_norm": 5.951769828796387, "learning_rate": 1.855308498842895e-06, "loss": 0.6854, "step": 4269 }, { "epoch": 0.62, "grad_norm": 5.437923908233643, "learning_rate": 1.8552264789055887e-06, "loss": 0.6598, "step": 4270 }, { "epoch": 0.62, "grad_norm": 5.467426300048828, "learning_rate": 1.8551444375419725e-06, "loss": 0.7617, "step": 4271 }, { "epoch": 0.62, "grad_norm": 5.702757835388184, "learning_rate": 1.8550623747541021e-06, "loss": 0.7572, "step": 4272 }, { "epoch": 0.62, "grad_norm": 5.511171340942383, "learning_rate": 1.854980290544033e-06, "loss": 0.7096, "step": 4273 }, { "epoch": 0.62, "grad_norm": 5.472903251647949, "learning_rate": 1.8548981849138221e-06, "loss": 0.7119, "step": 4274 }, { "epoch": 0.62, "grad_norm": 6.005753993988037, "learning_rate": 1.8548160578655261e-06, "loss": 0.7556, "step": 4275 }, { "epoch": 0.62, "grad_norm": 5.38547945022583, "learning_rate": 1.8547339094012027e-06, "loss": 0.7193, "step": 4276 }, { "epoch": 0.62, "grad_norm": 5.505527973175049, "learning_rate": 1.8546517395229102e-06, "loss": 0.7679, "step": 4277 }, { "epoch": 0.62, "grad_norm": 5.981709957122803, "learning_rate": 1.854569548232707e-06, "loss": 0.741, "step": 4278 }, { "epoch": 0.62, "grad_norm": 5.571542739868164, "learning_rate": 1.8544873355326521e-06, "loss": 0.7092, "step": 4279 }, { "epoch": 0.62, "grad_norm": 5.18302059173584, "learning_rate": 1.8544051014248054e-06, "loss": 0.6593, "step": 4280 }, { "epoch": 0.62, "grad_norm": 6.116055965423584, "learning_rate": 1.8543228459112274e-06, "loss": 0.7814, "step": 4281 }, { "epoch": 0.62, "grad_norm": 6.58733606338501, "learning_rate": 1.8542405689939785e-06, "loss": 0.7469, "step": 4282 }, { "epoch": 0.62, "grad_norm": 5.770270824432373, "learning_rate": 1.85415827067512e-06, "loss": 0.7284, "step": 4283 }, { "epoch": 0.62, "grad_norm": 5.726893424987793, "learning_rate": 1.854075950956714e-06, "loss": 0.7239, "step": 4284 }, { "epoch": 0.62, "grad_norm": 5.478294372558594, "learning_rate": 1.8539936098408227e-06, "loss": 0.6852, "step": 4285 }, { "epoch": 0.62, "grad_norm": 5.675847053527832, "learning_rate": 1.8539112473295091e-06, "loss": 0.7028, "step": 4286 }, { "epoch": 0.62, "grad_norm": 6.146766662597656, "learning_rate": 1.8538288634248367e-06, "loss": 0.7572, "step": 4287 }, { "epoch": 0.62, "grad_norm": 5.6176958084106445, "learning_rate": 1.8537464581288696e-06, "loss": 0.7053, "step": 4288 }, { "epoch": 0.62, "grad_norm": 6.186370849609375, "learning_rate": 1.853664031443672e-06, "loss": 0.7218, "step": 4289 }, { "epoch": 0.62, "grad_norm": 5.7128777503967285, "learning_rate": 1.853581583371309e-06, "loss": 0.7872, "step": 4290 }, { "epoch": 0.62, "grad_norm": 7.003813743591309, "learning_rate": 1.8534991139138467e-06, "loss": 0.6352, "step": 4291 }, { "epoch": 0.62, "grad_norm": 5.25942325592041, "learning_rate": 1.8534166230733507e-06, "loss": 0.646, "step": 4292 }, { "epoch": 0.62, "grad_norm": 5.4358649253845215, "learning_rate": 1.8533341108518881e-06, "loss": 0.6774, "step": 4293 }, { "epoch": 0.62, "grad_norm": 5.193737983703613, "learning_rate": 1.8532515772515255e-06, "loss": 0.691, "step": 4294 }, { "epoch": 0.62, "grad_norm": 5.647389888763428, "learning_rate": 1.8531690222743313e-06, "loss": 0.69, "step": 4295 }, { "epoch": 0.62, "grad_norm": 6.033236503601074, "learning_rate": 1.8530864459223733e-06, "loss": 0.7214, "step": 4296 }, { "epoch": 0.62, "grad_norm": 5.257839679718018, "learning_rate": 1.8530038481977206e-06, "loss": 0.6265, "step": 4297 }, { "epoch": 0.62, "grad_norm": 6.057753562927246, "learning_rate": 1.8529212291024426e-06, "loss": 0.7122, "step": 4298 }, { "epoch": 0.62, "grad_norm": 5.790197849273682, "learning_rate": 1.8528385886386091e-06, "loss": 0.7468, "step": 4299 }, { "epoch": 0.62, "grad_norm": 5.802096843719482, "learning_rate": 1.8527559268082902e-06, "loss": 0.6993, "step": 4300 }, { "epoch": 0.62, "grad_norm": 6.096851825714111, "learning_rate": 1.8526732436135573e-06, "loss": 0.7269, "step": 4301 }, { "epoch": 0.62, "grad_norm": 5.436527729034424, "learning_rate": 1.8525905390564818e-06, "loss": 0.7409, "step": 4302 }, { "epoch": 0.62, "grad_norm": 6.120482444763184, "learning_rate": 1.8525078131391354e-06, "loss": 0.7669, "step": 4303 }, { "epoch": 0.62, "grad_norm": 5.608564853668213, "learning_rate": 1.8524250658635912e-06, "loss": 0.6508, "step": 4304 }, { "epoch": 0.62, "grad_norm": 6.133932113647461, "learning_rate": 1.852342297231922e-06, "loss": 0.7592, "step": 4305 }, { "epoch": 0.62, "grad_norm": 5.60145378112793, "learning_rate": 1.8522595072462011e-06, "loss": 0.673, "step": 4306 }, { "epoch": 0.62, "grad_norm": 5.2959699630737305, "learning_rate": 1.8521766959085033e-06, "loss": 0.7119, "step": 4307 }, { "epoch": 0.63, "grad_norm": 6.266400337219238, "learning_rate": 1.8520938632209031e-06, "loss": 0.7303, "step": 4308 }, { "epoch": 0.63, "grad_norm": 5.164222240447998, "learning_rate": 1.8520110091854755e-06, "loss": 0.7065, "step": 4309 }, { "epoch": 0.63, "grad_norm": 7.62449836730957, "learning_rate": 1.8519281338042964e-06, "loss": 0.8027, "step": 4310 }, { "epoch": 0.63, "grad_norm": 6.116365432739258, "learning_rate": 1.8518452370794423e-06, "loss": 0.7429, "step": 4311 }, { "epoch": 0.63, "grad_norm": 5.5160322189331055, "learning_rate": 1.8517623190129899e-06, "loss": 0.7052, "step": 4312 }, { "epoch": 0.63, "grad_norm": 5.5025458335876465, "learning_rate": 1.8516793796070165e-06, "loss": 0.7639, "step": 4313 }, { "epoch": 0.63, "grad_norm": 5.964108467102051, "learning_rate": 1.8515964188636e-06, "loss": 0.6535, "step": 4314 }, { "epoch": 0.63, "grad_norm": 5.721992492675781, "learning_rate": 1.851513436784819e-06, "loss": 0.7025, "step": 4315 }, { "epoch": 0.63, "grad_norm": 6.589683532714844, "learning_rate": 1.8514304333727525e-06, "loss": 0.7636, "step": 4316 }, { "epoch": 0.63, "grad_norm": 6.170744895935059, "learning_rate": 1.8513474086294797e-06, "loss": 0.7149, "step": 4317 }, { "epoch": 0.63, "grad_norm": 5.633803367614746, "learning_rate": 1.8512643625570811e-06, "loss": 0.7337, "step": 4318 }, { "epoch": 0.63, "grad_norm": 5.05513858795166, "learning_rate": 1.8511812951576368e-06, "loss": 0.6474, "step": 4319 }, { "epoch": 0.63, "grad_norm": 5.867913246154785, "learning_rate": 1.8510982064332286e-06, "loss": 0.7333, "step": 4320 }, { "epoch": 0.63, "grad_norm": 5.817615985870361, "learning_rate": 1.8510150963859373e-06, "loss": 0.7436, "step": 4321 }, { "epoch": 0.63, "grad_norm": 5.082624435424805, "learning_rate": 1.8509319650178457e-06, "loss": 0.6665, "step": 4322 }, { "epoch": 0.63, "grad_norm": 5.500392436981201, "learning_rate": 1.850848812331036e-06, "loss": 0.7162, "step": 4323 }, { "epoch": 0.63, "grad_norm": 5.845370292663574, "learning_rate": 1.8507656383275925e-06, "loss": 0.6929, "step": 4324 }, { "epoch": 0.63, "grad_norm": 5.818332195281982, "learning_rate": 1.8506824430095978e-06, "loss": 0.8191, "step": 4325 }, { "epoch": 0.63, "grad_norm": 7.319582939147949, "learning_rate": 1.8505992263791367e-06, "loss": 0.8698, "step": 4326 }, { "epoch": 0.63, "grad_norm": 5.241942405700684, "learning_rate": 1.8505159884382941e-06, "loss": 0.6825, "step": 4327 }, { "epoch": 0.63, "grad_norm": 5.388897895812988, "learning_rate": 1.8504327291891553e-06, "loss": 0.6509, "step": 4328 }, { "epoch": 0.63, "grad_norm": 6.6159467697143555, "learning_rate": 1.8503494486338065e-06, "loss": 0.6505, "step": 4329 }, { "epoch": 0.63, "grad_norm": 5.868101596832275, "learning_rate": 1.850266146774334e-06, "loss": 0.7844, "step": 4330 }, { "epoch": 0.63, "grad_norm": 5.878164291381836, "learning_rate": 1.8501828236128242e-06, "loss": 0.7947, "step": 4331 }, { "epoch": 0.63, "grad_norm": 5.8475728034973145, "learning_rate": 1.8500994791513658e-06, "loss": 0.6914, "step": 4332 }, { "epoch": 0.63, "grad_norm": 5.343406677246094, "learning_rate": 1.8500161133920458e-06, "loss": 0.7396, "step": 4333 }, { "epoch": 0.63, "grad_norm": 5.22045373916626, "learning_rate": 1.8499327263369533e-06, "loss": 0.6896, "step": 4334 }, { "epoch": 0.63, "grad_norm": 6.08798360824585, "learning_rate": 1.8498493179881774e-06, "loss": 0.6354, "step": 4335 }, { "epoch": 0.63, "grad_norm": 5.167343616485596, "learning_rate": 1.8497658883478076e-06, "loss": 0.6706, "step": 4336 }, { "epoch": 0.63, "grad_norm": 5.605491638183594, "learning_rate": 1.8496824374179343e-06, "loss": 0.7363, "step": 4337 }, { "epoch": 0.63, "grad_norm": 6.076701641082764, "learning_rate": 1.8495989652006481e-06, "loss": 0.7097, "step": 4338 }, { "epoch": 0.63, "grad_norm": 5.889017581939697, "learning_rate": 1.8495154716980402e-06, "loss": 0.6597, "step": 4339 }, { "epoch": 0.63, "grad_norm": 5.081701755523682, "learning_rate": 1.8494319569122023e-06, "loss": 0.6854, "step": 4340 }, { "epoch": 0.63, "grad_norm": 6.628385066986084, "learning_rate": 1.8493484208452272e-06, "loss": 0.6868, "step": 4341 }, { "epoch": 0.63, "grad_norm": 5.9889655113220215, "learning_rate": 1.8492648634992075e-06, "loss": 0.7113, "step": 4342 }, { "epoch": 0.63, "grad_norm": 5.280473232269287, "learning_rate": 1.8491812848762364e-06, "loss": 0.7872, "step": 4343 }, { "epoch": 0.63, "grad_norm": 5.304630279541016, "learning_rate": 1.849097684978408e-06, "loss": 0.7615, "step": 4344 }, { "epoch": 0.63, "grad_norm": 5.619846820831299, "learning_rate": 1.8490140638078167e-06, "loss": 0.7667, "step": 4345 }, { "epoch": 0.63, "grad_norm": 5.1058268547058105, "learning_rate": 1.8489304213665577e-06, "loss": 0.6926, "step": 4346 }, { "epoch": 0.63, "grad_norm": 6.033750534057617, "learning_rate": 1.8488467576567261e-06, "loss": 0.7462, "step": 4347 }, { "epoch": 0.63, "grad_norm": 5.856573581695557, "learning_rate": 1.8487630726804185e-06, "loss": 0.6752, "step": 4348 }, { "epoch": 0.63, "grad_norm": 5.997268199920654, "learning_rate": 1.848679366439731e-06, "loss": 0.6415, "step": 4349 }, { "epoch": 0.63, "grad_norm": 5.460338592529297, "learning_rate": 1.848595638936761e-06, "loss": 0.662, "step": 4350 }, { "epoch": 0.63, "grad_norm": 6.290072441101074, "learning_rate": 1.848511890173606e-06, "loss": 0.6656, "step": 4351 }, { "epoch": 0.63, "grad_norm": 5.457590579986572, "learning_rate": 1.8484281201523646e-06, "loss": 0.7231, "step": 4352 }, { "epoch": 0.63, "grad_norm": 5.520097255706787, "learning_rate": 1.848344328875135e-06, "loss": 0.7314, "step": 4353 }, { "epoch": 0.63, "grad_norm": 6.74111795425415, "learning_rate": 1.8482605163440166e-06, "loss": 0.8352, "step": 4354 }, { "epoch": 0.63, "grad_norm": 4.924768447875977, "learning_rate": 1.8481766825611094e-06, "loss": 0.7125, "step": 4355 }, { "epoch": 0.63, "grad_norm": 5.790396213531494, "learning_rate": 1.8480928275285133e-06, "loss": 0.679, "step": 4356 }, { "epoch": 0.63, "grad_norm": 5.476426124572754, "learning_rate": 1.8480089512483294e-06, "loss": 0.7588, "step": 4357 }, { "epoch": 0.63, "grad_norm": 5.954228401184082, "learning_rate": 1.8479250537226593e-06, "loss": 0.7652, "step": 4358 }, { "epoch": 0.63, "grad_norm": 5.921971321105957, "learning_rate": 1.8478411349536048e-06, "loss": 0.7261, "step": 4359 }, { "epoch": 0.63, "grad_norm": 5.757192134857178, "learning_rate": 1.8477571949432682e-06, "loss": 0.7292, "step": 4360 }, { "epoch": 0.63, "grad_norm": 5.487383842468262, "learning_rate": 1.847673233693752e-06, "loss": 0.7338, "step": 4361 }, { "epoch": 0.63, "grad_norm": 5.414974212646484, "learning_rate": 1.847589251207161e-06, "loss": 0.679, "step": 4362 }, { "epoch": 0.63, "grad_norm": 5.552079677581787, "learning_rate": 1.847505247485598e-06, "loss": 0.7311, "step": 4363 }, { "epoch": 0.63, "grad_norm": 6.32920503616333, "learning_rate": 1.8474212225311684e-06, "loss": 0.7036, "step": 4364 }, { "epoch": 0.63, "grad_norm": 5.84152364730835, "learning_rate": 1.8473371763459768e-06, "loss": 0.71, "step": 4365 }, { "epoch": 0.63, "grad_norm": 5.97367000579834, "learning_rate": 1.8472531089321293e-06, "loss": 0.7156, "step": 4366 }, { "epoch": 0.63, "grad_norm": 5.789305686950684, "learning_rate": 1.8471690202917316e-06, "loss": 0.7296, "step": 4367 }, { "epoch": 0.63, "grad_norm": 5.539907455444336, "learning_rate": 1.8470849104268906e-06, "loss": 0.6917, "step": 4368 }, { "epoch": 0.63, "grad_norm": 6.090011119842529, "learning_rate": 1.8470007793397136e-06, "loss": 0.6295, "step": 4369 }, { "epoch": 0.63, "grad_norm": 5.789418697357178, "learning_rate": 1.8469166270323083e-06, "loss": 0.7355, "step": 4370 }, { "epoch": 0.63, "grad_norm": 5.500967502593994, "learning_rate": 1.8468324535067827e-06, "loss": 0.7882, "step": 4371 }, { "epoch": 0.63, "grad_norm": 5.524696350097656, "learning_rate": 1.8467482587652464e-06, "loss": 0.7028, "step": 4372 }, { "epoch": 0.63, "grad_norm": 5.350771427154541, "learning_rate": 1.8466640428098082e-06, "loss": 0.7102, "step": 4373 }, { "epoch": 0.63, "grad_norm": 5.276350975036621, "learning_rate": 1.8465798056425776e-06, "loss": 0.7647, "step": 4374 }, { "epoch": 0.63, "grad_norm": 6.068325519561768, "learning_rate": 1.846495547265666e-06, "loss": 0.6739, "step": 4375 }, { "epoch": 0.63, "grad_norm": 6.203246593475342, "learning_rate": 1.8464112676811838e-06, "loss": 0.7454, "step": 4376 }, { "epoch": 0.64, "grad_norm": 5.827967643737793, "learning_rate": 1.8463269668912426e-06, "loss": 0.6839, "step": 4377 }, { "epoch": 0.64, "grad_norm": 5.794304370880127, "learning_rate": 1.8462426448979541e-06, "loss": 0.6838, "step": 4378 }, { "epoch": 0.64, "grad_norm": 5.276254177093506, "learning_rate": 1.8461583017034315e-06, "loss": 0.6937, "step": 4379 }, { "epoch": 0.64, "grad_norm": 5.959649562835693, "learning_rate": 1.8460739373097873e-06, "loss": 0.8487, "step": 4380 }, { "epoch": 0.64, "grad_norm": 5.357737064361572, "learning_rate": 1.8459895517191355e-06, "loss": 0.7086, "step": 4381 }, { "epoch": 0.64, "grad_norm": 6.04617166519165, "learning_rate": 1.8459051449335898e-06, "loss": 0.7891, "step": 4382 }, { "epoch": 0.64, "grad_norm": 6.000442981719971, "learning_rate": 1.8458207169552653e-06, "loss": 0.7074, "step": 4383 }, { "epoch": 0.64, "grad_norm": 5.686867713928223, "learning_rate": 1.845736267786277e-06, "loss": 0.6551, "step": 4384 }, { "epoch": 0.64, "grad_norm": 5.212051868438721, "learning_rate": 1.8456517974287409e-06, "loss": 0.7102, "step": 4385 }, { "epoch": 0.64, "grad_norm": 6.310784339904785, "learning_rate": 1.845567305884773e-06, "loss": 0.7329, "step": 4386 }, { "epoch": 0.64, "grad_norm": 5.993363380432129, "learning_rate": 1.8454827931564899e-06, "loss": 0.651, "step": 4387 }, { "epoch": 0.64, "grad_norm": 5.224669456481934, "learning_rate": 1.8453982592460094e-06, "loss": 0.7104, "step": 4388 }, { "epoch": 0.64, "grad_norm": 5.889627456665039, "learning_rate": 1.845313704155449e-06, "loss": 0.6999, "step": 4389 }, { "epoch": 0.64, "grad_norm": 5.456923484802246, "learning_rate": 1.8452291278869272e-06, "loss": 0.7311, "step": 4390 }, { "epoch": 0.64, "grad_norm": 6.29994010925293, "learning_rate": 1.845144530442563e-06, "loss": 0.7519, "step": 4391 }, { "epoch": 0.64, "grad_norm": 6.304422378540039, "learning_rate": 1.845059911824476e-06, "loss": 0.6444, "step": 4392 }, { "epoch": 0.64, "grad_norm": 5.402986526489258, "learning_rate": 1.8449752720347857e-06, "loss": 0.7611, "step": 4393 }, { "epoch": 0.64, "grad_norm": 5.5362548828125, "learning_rate": 1.844890611075613e-06, "loss": 0.6409, "step": 4394 }, { "epoch": 0.64, "grad_norm": 6.122921943664551, "learning_rate": 1.844805928949079e-06, "loss": 0.6735, "step": 4395 }, { "epoch": 0.64, "grad_norm": 6.034157752990723, "learning_rate": 1.844721225657305e-06, "loss": 0.7201, "step": 4396 }, { "epoch": 0.64, "grad_norm": 6.035665512084961, "learning_rate": 1.8446365012024132e-06, "loss": 0.6906, "step": 4397 }, { "epoch": 0.64, "grad_norm": 6.426695346832275, "learning_rate": 1.8445517555865262e-06, "loss": 0.6902, "step": 4398 }, { "epoch": 0.64, "grad_norm": 5.702657222747803, "learning_rate": 1.8444669888117671e-06, "loss": 0.7614, "step": 4399 }, { "epoch": 0.64, "grad_norm": 7.065474987030029, "learning_rate": 1.84438220088026e-06, "loss": 0.671, "step": 4400 }, { "epoch": 0.64, "grad_norm": 5.376439571380615, "learning_rate": 1.8442973917941288e-06, "loss": 0.6655, "step": 4401 }, { "epoch": 0.64, "grad_norm": 5.877199649810791, "learning_rate": 1.8442125615554982e-06, "loss": 0.7825, "step": 4402 }, { "epoch": 0.64, "grad_norm": 10.764351844787598, "learning_rate": 1.8441277101664938e-06, "loss": 0.8077, "step": 4403 }, { "epoch": 0.64, "grad_norm": 5.09312105178833, "learning_rate": 1.844042837629241e-06, "loss": 0.7159, "step": 4404 }, { "epoch": 0.64, "grad_norm": 4.9248948097229, "learning_rate": 1.843957943945866e-06, "loss": 0.6368, "step": 4405 }, { "epoch": 0.64, "grad_norm": 5.774326801300049, "learning_rate": 1.8438730291184962e-06, "loss": 0.6874, "step": 4406 }, { "epoch": 0.64, "grad_norm": 6.050229072570801, "learning_rate": 1.8437880931492594e-06, "loss": 0.7456, "step": 4407 }, { "epoch": 0.64, "grad_norm": 5.871298313140869, "learning_rate": 1.843703136040282e-06, "loss": 0.7174, "step": 4408 }, { "epoch": 0.64, "grad_norm": 5.543554782867432, "learning_rate": 1.8436181577936941e-06, "loss": 0.64, "step": 4409 }, { "epoch": 0.64, "grad_norm": 5.292832851409912, "learning_rate": 1.8435331584116235e-06, "loss": 0.7664, "step": 4410 }, { "epoch": 0.64, "grad_norm": 5.290171146392822, "learning_rate": 1.8434481378962005e-06, "loss": 0.694, "step": 4411 }, { "epoch": 0.64, "grad_norm": 6.115489959716797, "learning_rate": 1.843363096249555e-06, "loss": 0.714, "step": 4412 }, { "epoch": 0.64, "grad_norm": 5.271993637084961, "learning_rate": 1.8432780334738171e-06, "loss": 0.7257, "step": 4413 }, { "epoch": 0.64, "grad_norm": 5.869672775268555, "learning_rate": 1.8431929495711185e-06, "loss": 0.7316, "step": 4414 }, { "epoch": 0.64, "grad_norm": 5.332033634185791, "learning_rate": 1.8431078445435904e-06, "loss": 0.6796, "step": 4415 }, { "epoch": 0.64, "grad_norm": 6.135114669799805, "learning_rate": 1.8430227183933655e-06, "loss": 0.7923, "step": 4416 }, { "epoch": 0.64, "grad_norm": 5.559622287750244, "learning_rate": 1.8429375711225758e-06, "loss": 0.7441, "step": 4417 }, { "epoch": 0.64, "grad_norm": 5.9071173667907715, "learning_rate": 1.842852402733355e-06, "loss": 0.7182, "step": 4418 }, { "epoch": 0.64, "grad_norm": 5.113497734069824, "learning_rate": 1.842767213227837e-06, "loss": 0.6682, "step": 4419 }, { "epoch": 0.64, "grad_norm": 6.40803337097168, "learning_rate": 1.8426820026081554e-06, "loss": 0.6655, "step": 4420 }, { "epoch": 0.64, "grad_norm": 5.5277557373046875, "learning_rate": 1.8425967708764456e-06, "loss": 0.7269, "step": 4421 }, { "epoch": 0.64, "grad_norm": 5.499736309051514, "learning_rate": 1.8425115180348427e-06, "loss": 0.6976, "step": 4422 }, { "epoch": 0.64, "grad_norm": 5.805480480194092, "learning_rate": 1.842426244085483e-06, "loss": 0.7708, "step": 4423 }, { "epoch": 0.64, "grad_norm": 5.449337005615234, "learning_rate": 1.8423409490305022e-06, "loss": 0.648, "step": 4424 }, { "epoch": 0.64, "grad_norm": 5.847498893737793, "learning_rate": 1.8422556328720375e-06, "loss": 0.7045, "step": 4425 }, { "epoch": 0.64, "grad_norm": 5.969493865966797, "learning_rate": 1.842170295612227e-06, "loss": 0.8432, "step": 4426 }, { "epoch": 0.64, "grad_norm": 5.843381881713867, "learning_rate": 1.8420849372532074e-06, "loss": 0.7426, "step": 4427 }, { "epoch": 0.64, "grad_norm": 6.281838893890381, "learning_rate": 1.8419995577971184e-06, "loss": 0.8101, "step": 4428 }, { "epoch": 0.64, "grad_norm": 5.35172176361084, "learning_rate": 1.8419141572460983e-06, "loss": 0.7094, "step": 4429 }, { "epoch": 0.64, "grad_norm": 5.6464433670043945, "learning_rate": 1.8418287356022875e-06, "loss": 0.6514, "step": 4430 }, { "epoch": 0.64, "grad_norm": 6.190603733062744, "learning_rate": 1.841743292867825e-06, "loss": 0.9511, "step": 4431 }, { "epoch": 0.64, "grad_norm": 6.338459491729736, "learning_rate": 1.8416578290448524e-06, "loss": 0.8086, "step": 4432 }, { "epoch": 0.64, "grad_norm": 5.93052864074707, "learning_rate": 1.84157234413551e-06, "loss": 0.7513, "step": 4433 }, { "epoch": 0.64, "grad_norm": 5.761868953704834, "learning_rate": 1.8414868381419402e-06, "loss": 0.6547, "step": 4434 }, { "epoch": 0.64, "grad_norm": 7.781351566314697, "learning_rate": 1.8414013110662848e-06, "loss": 0.7687, "step": 4435 }, { "epoch": 0.64, "grad_norm": 6.224668502807617, "learning_rate": 1.841315762910687e-06, "loss": 0.6933, "step": 4436 }, { "epoch": 0.64, "grad_norm": 6.1888604164123535, "learning_rate": 1.8412301936772894e-06, "loss": 0.7242, "step": 4437 }, { "epoch": 0.64, "grad_norm": 5.865229606628418, "learning_rate": 1.8411446033682363e-06, "loss": 0.7636, "step": 4438 }, { "epoch": 0.64, "grad_norm": 5.39006233215332, "learning_rate": 1.8410589919856717e-06, "loss": 0.6395, "step": 4439 }, { "epoch": 0.64, "grad_norm": 5.81508731842041, "learning_rate": 1.8409733595317409e-06, "loss": 0.7309, "step": 4440 }, { "epoch": 0.64, "grad_norm": 5.2058491706848145, "learning_rate": 1.8408877060085888e-06, "loss": 0.7149, "step": 4441 }, { "epoch": 0.64, "grad_norm": 5.961480617523193, "learning_rate": 1.8408020314183617e-06, "loss": 0.7354, "step": 4442 }, { "epoch": 0.64, "grad_norm": 5.499805927276611, "learning_rate": 1.8407163357632058e-06, "loss": 0.6652, "step": 4443 }, { "epoch": 0.64, "grad_norm": 5.706496715545654, "learning_rate": 1.8406306190452682e-06, "loss": 0.7555, "step": 4444 }, { "epoch": 0.64, "grad_norm": 5.589308738708496, "learning_rate": 1.8405448812666964e-06, "loss": 0.6642, "step": 4445 }, { "epoch": 0.65, "grad_norm": 5.76857328414917, "learning_rate": 1.840459122429638e-06, "loss": 0.6943, "step": 4446 }, { "epoch": 0.65, "grad_norm": 5.906928539276123, "learning_rate": 1.840373342536242e-06, "loss": 0.6923, "step": 4447 }, { "epoch": 0.65, "grad_norm": 6.620219707489014, "learning_rate": 1.8402875415886576e-06, "loss": 0.722, "step": 4448 }, { "epoch": 0.65, "grad_norm": 5.922585487365723, "learning_rate": 1.840201719589034e-06, "loss": 0.6877, "step": 4449 }, { "epoch": 0.65, "grad_norm": 5.594963073730469, "learning_rate": 1.8401158765395214e-06, "loss": 0.6833, "step": 4450 }, { "epoch": 0.65, "grad_norm": 5.727317810058594, "learning_rate": 1.8400300124422708e-06, "loss": 0.7221, "step": 4451 }, { "epoch": 0.65, "grad_norm": 5.6837592124938965, "learning_rate": 1.839944127299433e-06, "loss": 0.6745, "step": 4452 }, { "epoch": 0.65, "grad_norm": 5.638040542602539, "learning_rate": 1.8398582211131597e-06, "loss": 0.738, "step": 4453 }, { "epoch": 0.65, "grad_norm": 5.338879585266113, "learning_rate": 1.8397722938856038e-06, "loss": 0.7292, "step": 4454 }, { "epoch": 0.65, "grad_norm": 5.846175193786621, "learning_rate": 1.839686345618917e-06, "loss": 0.8035, "step": 4455 }, { "epoch": 0.65, "grad_norm": 5.353514671325684, "learning_rate": 1.8396003763152534e-06, "loss": 0.7176, "step": 4456 }, { "epoch": 0.65, "grad_norm": 6.215798854827881, "learning_rate": 1.8395143859767665e-06, "loss": 0.7485, "step": 4457 }, { "epoch": 0.65, "grad_norm": 5.819431781768799, "learning_rate": 1.839428374605611e-06, "loss": 0.835, "step": 4458 }, { "epoch": 0.65, "grad_norm": 6.2414631843566895, "learning_rate": 1.8393423422039414e-06, "loss": 0.6998, "step": 4459 }, { "epoch": 0.65, "grad_norm": 5.833624839782715, "learning_rate": 1.839256288773913e-06, "loss": 0.6565, "step": 4460 }, { "epoch": 0.65, "grad_norm": 5.791172504425049, "learning_rate": 1.8391702143176819e-06, "loss": 0.6401, "step": 4461 }, { "epoch": 0.65, "grad_norm": 6.022670745849609, "learning_rate": 1.8390841188374048e-06, "loss": 0.6708, "step": 4462 }, { "epoch": 0.65, "grad_norm": 5.545808792114258, "learning_rate": 1.8389980023352383e-06, "loss": 0.6834, "step": 4463 }, { "epoch": 0.65, "grad_norm": 5.907962322235107, "learning_rate": 1.8389118648133404e-06, "loss": 0.6574, "step": 4464 }, { "epoch": 0.65, "grad_norm": 6.371851444244385, "learning_rate": 1.8388257062738686e-06, "loss": 0.6684, "step": 4465 }, { "epoch": 0.65, "grad_norm": 5.249838829040527, "learning_rate": 1.8387395267189816e-06, "loss": 0.698, "step": 4466 }, { "epoch": 0.65, "grad_norm": 6.040472984313965, "learning_rate": 1.8386533261508385e-06, "loss": 0.7343, "step": 4467 }, { "epoch": 0.65, "grad_norm": 6.258182048797607, "learning_rate": 1.8385671045715992e-06, "loss": 0.8632, "step": 4468 }, { "epoch": 0.65, "grad_norm": 5.714554309844971, "learning_rate": 1.8384808619834232e-06, "loss": 0.7234, "step": 4469 }, { "epoch": 0.65, "grad_norm": 5.422533988952637, "learning_rate": 1.8383945983884721e-06, "loss": 0.6809, "step": 4470 }, { "epoch": 0.65, "grad_norm": 5.893794536590576, "learning_rate": 1.8383083137889064e-06, "loss": 0.775, "step": 4471 }, { "epoch": 0.65, "grad_norm": 5.946055889129639, "learning_rate": 1.8382220081868881e-06, "loss": 0.7158, "step": 4472 }, { "epoch": 0.65, "grad_norm": 5.507126808166504, "learning_rate": 1.8381356815845791e-06, "loss": 0.7103, "step": 4473 }, { "epoch": 0.65, "grad_norm": 5.3719801902771, "learning_rate": 1.8380493339841427e-06, "loss": 0.666, "step": 4474 }, { "epoch": 0.65, "grad_norm": 5.886734962463379, "learning_rate": 1.837962965387742e-06, "loss": 0.6388, "step": 4475 }, { "epoch": 0.65, "grad_norm": 5.839935302734375, "learning_rate": 1.8378765757975404e-06, "loss": 0.7143, "step": 4476 }, { "epoch": 0.65, "grad_norm": 5.707546710968018, "learning_rate": 1.8377901652157029e-06, "loss": 0.7203, "step": 4477 }, { "epoch": 0.65, "grad_norm": 5.375580310821533, "learning_rate": 1.837703733644394e-06, "loss": 0.666, "step": 4478 }, { "epoch": 0.65, "grad_norm": 5.869944095611572, "learning_rate": 1.8376172810857791e-06, "loss": 0.7727, "step": 4479 }, { "epoch": 0.65, "grad_norm": 5.533034324645996, "learning_rate": 1.8375308075420243e-06, "loss": 0.6729, "step": 4480 }, { "epoch": 0.65, "grad_norm": 5.824966907501221, "learning_rate": 1.8374443130152958e-06, "loss": 0.7561, "step": 4481 }, { "epoch": 0.65, "grad_norm": 7.309196472167969, "learning_rate": 1.837357797507761e-06, "loss": 0.7295, "step": 4482 }, { "epoch": 0.65, "grad_norm": 5.400826930999756, "learning_rate": 1.8372712610215869e-06, "loss": 0.7633, "step": 4483 }, { "epoch": 0.65, "grad_norm": 5.6189866065979, "learning_rate": 1.8371847035589419e-06, "loss": 0.6684, "step": 4484 }, { "epoch": 0.65, "grad_norm": 5.400534152984619, "learning_rate": 1.8370981251219942e-06, "loss": 0.6758, "step": 4485 }, { "epoch": 0.65, "grad_norm": 5.616333961486816, "learning_rate": 1.8370115257129135e-06, "loss": 0.7267, "step": 4486 }, { "epoch": 0.65, "grad_norm": 5.7409162521362305, "learning_rate": 1.8369249053338686e-06, "loss": 0.7032, "step": 4487 }, { "epoch": 0.65, "grad_norm": 5.95754337310791, "learning_rate": 1.8368382639870305e-06, "loss": 0.7502, "step": 4488 }, { "epoch": 0.65, "grad_norm": 5.6876349449157715, "learning_rate": 1.8367516016745692e-06, "loss": 0.6597, "step": 4489 }, { "epoch": 0.65, "grad_norm": 5.926693439483643, "learning_rate": 1.836664918398656e-06, "loss": 0.7439, "step": 4490 }, { "epoch": 0.65, "grad_norm": 6.301686763763428, "learning_rate": 1.8365782141614628e-06, "loss": 0.7856, "step": 4491 }, { "epoch": 0.65, "grad_norm": 7.036918640136719, "learning_rate": 1.8364914889651618e-06, "loss": 0.7507, "step": 4492 }, { "epoch": 0.65, "grad_norm": 5.192286968231201, "learning_rate": 1.8364047428119255e-06, "loss": 0.6363, "step": 4493 }, { "epoch": 0.65, "grad_norm": 5.851945400238037, "learning_rate": 1.8363179757039276e-06, "loss": 0.7616, "step": 4494 }, { "epoch": 0.65, "grad_norm": 7.233661651611328, "learning_rate": 1.836231187643342e-06, "loss": 0.734, "step": 4495 }, { "epoch": 0.65, "grad_norm": 5.73785924911499, "learning_rate": 1.836144378632342e-06, "loss": 0.7607, "step": 4496 }, { "epoch": 0.65, "grad_norm": 5.253087997436523, "learning_rate": 1.8360575486731037e-06, "loss": 0.7577, "step": 4497 }, { "epoch": 0.65, "grad_norm": 6.070878028869629, "learning_rate": 1.8359706977678017e-06, "loss": 0.7874, "step": 4498 }, { "epoch": 0.65, "grad_norm": 5.524001598358154, "learning_rate": 1.8358838259186123e-06, "loss": 0.743, "step": 4499 }, { "epoch": 0.65, "grad_norm": 5.161069393157959, "learning_rate": 1.8357969331277119e-06, "loss": 0.7579, "step": 4500 }, { "epoch": 0.65, "grad_norm": 5.944278240203857, "learning_rate": 1.8357100193972772e-06, "loss": 0.7776, "step": 4501 }, { "epoch": 0.65, "grad_norm": 5.337409973144531, "learning_rate": 1.835623084729486e-06, "loss": 0.7442, "step": 4502 }, { "epoch": 0.65, "grad_norm": 5.4104766845703125, "learning_rate": 1.835536129126516e-06, "loss": 0.6481, "step": 4503 }, { "epoch": 0.65, "grad_norm": 5.3799729347229, "learning_rate": 1.835449152590546e-06, "loss": 0.7383, "step": 4504 }, { "epoch": 0.65, "grad_norm": 5.377315521240234, "learning_rate": 1.8353621551237548e-06, "loss": 0.705, "step": 4505 }, { "epoch": 0.65, "grad_norm": 5.7048420906066895, "learning_rate": 1.8352751367283222e-06, "loss": 0.6921, "step": 4506 }, { "epoch": 0.65, "grad_norm": 5.4720234870910645, "learning_rate": 1.8351880974064282e-06, "loss": 0.6902, "step": 4507 }, { "epoch": 0.65, "grad_norm": 6.18597412109375, "learning_rate": 1.8351010371602538e-06, "loss": 0.799, "step": 4508 }, { "epoch": 0.65, "grad_norm": 6.218517303466797, "learning_rate": 1.8350139559919793e-06, "loss": 0.8231, "step": 4509 }, { "epoch": 0.65, "grad_norm": 5.955119609832764, "learning_rate": 1.8349268539037872e-06, "loss": 0.8454, "step": 4510 }, { "epoch": 0.65, "grad_norm": 5.719101428985596, "learning_rate": 1.834839730897859e-06, "loss": 0.7178, "step": 4511 }, { "epoch": 0.65, "grad_norm": 5.656411647796631, "learning_rate": 1.8347525869763784e-06, "loss": 0.7047, "step": 4512 }, { "epoch": 0.65, "grad_norm": 5.725639820098877, "learning_rate": 1.8346654221415277e-06, "loss": 0.6557, "step": 4513 }, { "epoch": 0.65, "grad_norm": 5.678221225738525, "learning_rate": 1.8345782363954911e-06, "loss": 0.7553, "step": 4514 }, { "epoch": 0.66, "grad_norm": 6.729249000549316, "learning_rate": 1.834491029740453e-06, "loss": 0.7647, "step": 4515 }, { "epoch": 0.66, "grad_norm": 5.5473527908325195, "learning_rate": 1.8344038021785977e-06, "loss": 0.7068, "step": 4516 }, { "epoch": 0.66, "grad_norm": 5.859917640686035, "learning_rate": 1.8343165537121112e-06, "loss": 0.7097, "step": 4517 }, { "epoch": 0.66, "grad_norm": 5.816864490509033, "learning_rate": 1.834229284343179e-06, "loss": 0.6653, "step": 4518 }, { "epoch": 0.66, "grad_norm": 5.462427139282227, "learning_rate": 1.8341419940739875e-06, "loss": 0.6844, "step": 4519 }, { "epoch": 0.66, "grad_norm": 5.828326225280762, "learning_rate": 1.8340546829067237e-06, "loss": 0.795, "step": 4520 }, { "epoch": 0.66, "grad_norm": 6.053552150726318, "learning_rate": 1.8339673508435752e-06, "loss": 0.7713, "step": 4521 }, { "epoch": 0.66, "grad_norm": 5.677738666534424, "learning_rate": 1.8338799978867295e-06, "loss": 0.7624, "step": 4522 }, { "epoch": 0.66, "grad_norm": 6.1385178565979, "learning_rate": 1.8337926240383755e-06, "loss": 0.808, "step": 4523 }, { "epoch": 0.66, "grad_norm": 6.257558822631836, "learning_rate": 1.8337052293007021e-06, "loss": 0.5965, "step": 4524 }, { "epoch": 0.66, "grad_norm": 6.615542888641357, "learning_rate": 1.8336178136758987e-06, "loss": 0.7766, "step": 4525 }, { "epoch": 0.66, "grad_norm": 5.3645405769348145, "learning_rate": 1.8335303771661555e-06, "loss": 0.6812, "step": 4526 }, { "epoch": 0.66, "grad_norm": 5.0830559730529785, "learning_rate": 1.8334429197736632e-06, "loss": 0.7232, "step": 4527 }, { "epoch": 0.66, "grad_norm": 5.678694248199463, "learning_rate": 1.8333554415006125e-06, "loss": 0.7037, "step": 4528 }, { "epoch": 0.66, "grad_norm": 5.586935997009277, "learning_rate": 1.8332679423491953e-06, "loss": 0.655, "step": 4529 }, { "epoch": 0.66, "grad_norm": 5.4047064781188965, "learning_rate": 1.8331804223216038e-06, "loss": 0.6836, "step": 4530 }, { "epoch": 0.66, "grad_norm": 5.664434432983398, "learning_rate": 1.833092881420031e-06, "loss": 0.7809, "step": 4531 }, { "epoch": 0.66, "grad_norm": 5.873064994812012, "learning_rate": 1.833005319646669e-06, "loss": 0.7557, "step": 4532 }, { "epoch": 0.66, "grad_norm": 5.8366217613220215, "learning_rate": 1.8329177370037123e-06, "loss": 0.7361, "step": 4533 }, { "epoch": 0.66, "grad_norm": 5.503838062286377, "learning_rate": 1.8328301334933554e-06, "loss": 0.7025, "step": 4534 }, { "epoch": 0.66, "grad_norm": 6.159261703491211, "learning_rate": 1.8327425091177926e-06, "loss": 0.817, "step": 4535 }, { "epoch": 0.66, "grad_norm": 6.191507816314697, "learning_rate": 1.8326548638792192e-06, "loss": 0.7287, "step": 4536 }, { "epoch": 0.66, "grad_norm": 5.130524635314941, "learning_rate": 1.8325671977798312e-06, "loss": 0.7158, "step": 4537 }, { "epoch": 0.66, "grad_norm": 5.950085639953613, "learning_rate": 1.8324795108218249e-06, "loss": 0.7154, "step": 4538 }, { "epoch": 0.66, "grad_norm": 6.303215503692627, "learning_rate": 1.8323918030073968e-06, "loss": 0.7085, "step": 4539 }, { "epoch": 0.66, "grad_norm": 5.958596229553223, "learning_rate": 1.832304074338745e-06, "loss": 0.7238, "step": 4540 }, { "epoch": 0.66, "grad_norm": 6.3493781089782715, "learning_rate": 1.8322163248180665e-06, "loss": 0.7438, "step": 4541 }, { "epoch": 0.66, "grad_norm": 5.793335437774658, "learning_rate": 1.8321285544475606e-06, "loss": 0.6307, "step": 4542 }, { "epoch": 0.66, "grad_norm": 5.580075263977051, "learning_rate": 1.8320407632294254e-06, "loss": 0.7696, "step": 4543 }, { "epoch": 0.66, "grad_norm": 5.847567081451416, "learning_rate": 1.8319529511658614e-06, "loss": 0.844, "step": 4544 }, { "epoch": 0.66, "grad_norm": 5.900949478149414, "learning_rate": 1.8318651182590674e-06, "loss": 0.8044, "step": 4545 }, { "epoch": 0.66, "grad_norm": 5.354107856750488, "learning_rate": 1.8317772645112448e-06, "loss": 0.6718, "step": 4546 }, { "epoch": 0.66, "grad_norm": 5.457724094390869, "learning_rate": 1.8316893899245942e-06, "loss": 0.7321, "step": 4547 }, { "epoch": 0.66, "grad_norm": 6.354732036590576, "learning_rate": 1.8316014945013176e-06, "loss": 0.7256, "step": 4548 }, { "epoch": 0.66, "grad_norm": 5.224138259887695, "learning_rate": 1.8315135782436167e-06, "loss": 0.661, "step": 4549 }, { "epoch": 0.66, "grad_norm": 6.0611796379089355, "learning_rate": 1.8314256411536938e-06, "loss": 0.6942, "step": 4550 }, { "epoch": 0.66, "grad_norm": 5.9462666511535645, "learning_rate": 1.8313376832337527e-06, "loss": 0.7431, "step": 4551 }, { "epoch": 0.66, "grad_norm": 6.389776229858398, "learning_rate": 1.8312497044859966e-06, "loss": 0.7396, "step": 4552 }, { "epoch": 0.66, "grad_norm": 5.7092719078063965, "learning_rate": 1.8311617049126299e-06, "loss": 0.7105, "step": 4553 }, { "epoch": 0.66, "grad_norm": 5.785711288452148, "learning_rate": 1.831073684515857e-06, "loss": 0.7687, "step": 4554 }, { "epoch": 0.66, "grad_norm": 6.244706153869629, "learning_rate": 1.8309856432978833e-06, "loss": 0.6955, "step": 4555 }, { "epoch": 0.66, "grad_norm": 5.032446384429932, "learning_rate": 1.8308975812609147e-06, "loss": 0.6259, "step": 4556 }, { "epoch": 0.66, "grad_norm": 5.7162556648254395, "learning_rate": 1.8308094984071571e-06, "loss": 0.6911, "step": 4557 }, { "epoch": 0.66, "grad_norm": 6.287631034851074, "learning_rate": 1.8307213947388175e-06, "loss": 0.68, "step": 4558 }, { "epoch": 0.66, "grad_norm": 6.606784820556641, "learning_rate": 1.830633270258103e-06, "loss": 0.7896, "step": 4559 }, { "epoch": 0.66, "grad_norm": 5.49531888961792, "learning_rate": 1.8305451249672217e-06, "loss": 0.7787, "step": 4560 }, { "epoch": 0.66, "grad_norm": 5.850902080535889, "learning_rate": 1.8304569588683818e-06, "loss": 0.7382, "step": 4561 }, { "epoch": 0.66, "grad_norm": 6.171844005584717, "learning_rate": 1.8303687719637924e-06, "loss": 0.7497, "step": 4562 }, { "epoch": 0.66, "grad_norm": 5.6234588623046875, "learning_rate": 1.8302805642556622e-06, "loss": 0.7152, "step": 4563 }, { "epoch": 0.66, "grad_norm": 5.511115550994873, "learning_rate": 1.8301923357462016e-06, "loss": 0.7249, "step": 4564 }, { "epoch": 0.66, "grad_norm": 6.310948371887207, "learning_rate": 1.830104086437621e-06, "loss": 0.7474, "step": 4565 }, { "epoch": 0.66, "grad_norm": 6.22597599029541, "learning_rate": 1.8300158163321316e-06, "loss": 0.7079, "step": 4566 }, { "epoch": 0.66, "grad_norm": 6.19977331161499, "learning_rate": 1.829927525431944e-06, "loss": 0.7521, "step": 4567 }, { "epoch": 0.66, "grad_norm": 5.720122337341309, "learning_rate": 1.8298392137392712e-06, "loss": 0.8123, "step": 4568 }, { "epoch": 0.66, "grad_norm": 5.630945205688477, "learning_rate": 1.829750881256325e-06, "loss": 0.66, "step": 4569 }, { "epoch": 0.66, "grad_norm": 5.963653564453125, "learning_rate": 1.8296625279853189e-06, "loss": 0.6713, "step": 4570 }, { "epoch": 0.66, "grad_norm": 5.777524948120117, "learning_rate": 1.829574153928466e-06, "loss": 0.7346, "step": 4571 }, { "epoch": 0.66, "grad_norm": 6.015121936798096, "learning_rate": 1.8294857590879804e-06, "loss": 0.7382, "step": 4572 }, { "epoch": 0.66, "grad_norm": 5.178116321563721, "learning_rate": 1.8293973434660771e-06, "loss": 0.6597, "step": 4573 }, { "epoch": 0.66, "grad_norm": 6.066927433013916, "learning_rate": 1.8293089070649713e-06, "loss": 0.7373, "step": 4574 }, { "epoch": 0.66, "grad_norm": 6.064211368560791, "learning_rate": 1.8292204498868778e-06, "loss": 0.6815, "step": 4575 }, { "epoch": 0.66, "grad_norm": 5.534005165100098, "learning_rate": 1.8291319719340135e-06, "loss": 0.8049, "step": 4576 }, { "epoch": 0.66, "grad_norm": 6.071412563323975, "learning_rate": 1.8290434732085948e-06, "loss": 0.7552, "step": 4577 }, { "epoch": 0.66, "grad_norm": 6.322497844696045, "learning_rate": 1.8289549537128392e-06, "loss": 0.8468, "step": 4578 }, { "epoch": 0.66, "grad_norm": 5.206225872039795, "learning_rate": 1.8288664134489639e-06, "loss": 0.6639, "step": 4579 }, { "epoch": 0.66, "grad_norm": 5.96232271194458, "learning_rate": 1.8287778524191872e-06, "loss": 0.7274, "step": 4580 }, { "epoch": 0.66, "grad_norm": 6.317347526550293, "learning_rate": 1.8286892706257284e-06, "loss": 0.6971, "step": 4581 }, { "epoch": 0.66, "grad_norm": 5.36855411529541, "learning_rate": 1.8286006680708063e-06, "loss": 0.7595, "step": 4582 }, { "epoch": 0.66, "grad_norm": 5.8387227058410645, "learning_rate": 1.8285120447566406e-06, "loss": 0.7042, "step": 4583 }, { "epoch": 0.67, "grad_norm": 5.39667272567749, "learning_rate": 1.828423400685452e-06, "loss": 0.7773, "step": 4584 }, { "epoch": 0.67, "grad_norm": 5.167214393615723, "learning_rate": 1.8283347358594609e-06, "loss": 0.7059, "step": 4585 }, { "epoch": 0.67, "grad_norm": 5.909808158874512, "learning_rate": 1.8282460502808891e-06, "loss": 0.7149, "step": 4586 }, { "epoch": 0.67, "grad_norm": 5.6676716804504395, "learning_rate": 1.8281573439519584e-06, "loss": 0.6843, "step": 4587 }, { "epoch": 0.67, "grad_norm": 5.742567539215088, "learning_rate": 1.8280686168748908e-06, "loss": 0.7082, "step": 4588 }, { "epoch": 0.67, "grad_norm": 6.155275821685791, "learning_rate": 1.8279798690519094e-06, "loss": 0.6847, "step": 4589 }, { "epoch": 0.67, "grad_norm": 5.897824764251709, "learning_rate": 1.8278911004852383e-06, "loss": 0.6586, "step": 4590 }, { "epoch": 0.67, "grad_norm": 5.385296821594238, "learning_rate": 1.8278023111771e-06, "loss": 0.7151, "step": 4591 }, { "epoch": 0.67, "grad_norm": 5.236146926879883, "learning_rate": 1.8277135011297204e-06, "loss": 0.7321, "step": 4592 }, { "epoch": 0.67, "grad_norm": 5.615884304046631, "learning_rate": 1.827624670345324e-06, "loss": 0.7062, "step": 4593 }, { "epoch": 0.67, "grad_norm": 5.7069292068481445, "learning_rate": 1.827535818826136e-06, "loss": 0.6036, "step": 4594 }, { "epoch": 0.67, "grad_norm": 5.9980692863464355, "learning_rate": 1.8274469465743827e-06, "loss": 0.7502, "step": 4595 }, { "epoch": 0.67, "grad_norm": 6.763906955718994, "learning_rate": 1.8273580535922906e-06, "loss": 0.7849, "step": 4596 }, { "epoch": 0.67, "grad_norm": 5.913297653198242, "learning_rate": 1.8272691398820868e-06, "loss": 0.6879, "step": 4597 }, { "epoch": 0.67, "grad_norm": 5.945658206939697, "learning_rate": 1.8271802054459988e-06, "loss": 0.7674, "step": 4598 }, { "epoch": 0.67, "grad_norm": 5.180346488952637, "learning_rate": 1.827091250286255e-06, "loss": 0.7108, "step": 4599 }, { "epoch": 0.67, "grad_norm": 5.3412184715271, "learning_rate": 1.8270022744050838e-06, "loss": 0.6708, "step": 4600 }, { "epoch": 0.67, "grad_norm": 6.07837438583374, "learning_rate": 1.8269132778047141e-06, "loss": 0.6832, "step": 4601 }, { "epoch": 0.67, "grad_norm": 6.615143775939941, "learning_rate": 1.8268242604873757e-06, "loss": 0.8363, "step": 4602 }, { "epoch": 0.67, "grad_norm": 5.727717399597168, "learning_rate": 1.8267352224552992e-06, "loss": 0.6563, "step": 4603 }, { "epoch": 0.67, "grad_norm": 6.125892162322998, "learning_rate": 1.826646163710715e-06, "loss": 0.6713, "step": 4604 }, { "epoch": 0.67, "grad_norm": 5.526310920715332, "learning_rate": 1.8265570842558541e-06, "loss": 0.7786, "step": 4605 }, { "epoch": 0.67, "grad_norm": 5.889415264129639, "learning_rate": 1.8264679840929486e-06, "loss": 0.7579, "step": 4606 }, { "epoch": 0.67, "grad_norm": 5.2219696044921875, "learning_rate": 1.8263788632242307e-06, "loss": 0.7059, "step": 4607 }, { "epoch": 0.67, "grad_norm": 5.708352088928223, "learning_rate": 1.8262897216519332e-06, "loss": 0.7532, "step": 4608 }, { "epoch": 0.67, "grad_norm": 5.868206977844238, "learning_rate": 1.826200559378289e-06, "loss": 0.6797, "step": 4609 }, { "epoch": 0.67, "grad_norm": 5.698866367340088, "learning_rate": 1.8261113764055324e-06, "loss": 0.7641, "step": 4610 }, { "epoch": 0.67, "grad_norm": 6.054651737213135, "learning_rate": 1.8260221727358975e-06, "loss": 0.7636, "step": 4611 }, { "epoch": 0.67, "grad_norm": 6.777349472045898, "learning_rate": 1.8259329483716192e-06, "loss": 0.6666, "step": 4612 }, { "epoch": 0.67, "grad_norm": 5.670973300933838, "learning_rate": 1.825843703314933e-06, "loss": 0.7636, "step": 4613 }, { "epoch": 0.67, "grad_norm": 4.9123125076293945, "learning_rate": 1.8257544375680745e-06, "loss": 0.7174, "step": 4614 }, { "epoch": 0.67, "grad_norm": 5.509328842163086, "learning_rate": 1.8256651511332803e-06, "loss": 0.7284, "step": 4615 }, { "epoch": 0.67, "grad_norm": 5.322074890136719, "learning_rate": 1.8255758440127874e-06, "loss": 0.7681, "step": 4616 }, { "epoch": 0.67, "grad_norm": 5.7364583015441895, "learning_rate": 1.8254865162088332e-06, "loss": 0.7191, "step": 4617 }, { "epoch": 0.67, "grad_norm": 5.641439914703369, "learning_rate": 1.8253971677236555e-06, "loss": 0.7072, "step": 4618 }, { "epoch": 0.67, "grad_norm": 5.870400905609131, "learning_rate": 1.8253077985594928e-06, "loss": 0.7617, "step": 4619 }, { "epoch": 0.67, "grad_norm": 6.6955108642578125, "learning_rate": 1.8252184087185844e-06, "loss": 0.7678, "step": 4620 }, { "epoch": 0.67, "grad_norm": 5.275647163391113, "learning_rate": 1.8251289982031696e-06, "loss": 0.6444, "step": 4621 }, { "epoch": 0.67, "grad_norm": 5.673607349395752, "learning_rate": 1.8250395670154884e-06, "loss": 0.7762, "step": 4622 }, { "epoch": 0.67, "grad_norm": 5.3947601318359375, "learning_rate": 1.8249501151577814e-06, "loss": 0.6819, "step": 4623 }, { "epoch": 0.67, "grad_norm": 5.817721843719482, "learning_rate": 1.8248606426322896e-06, "loss": 0.7554, "step": 4624 }, { "epoch": 0.67, "grad_norm": 5.497783184051514, "learning_rate": 1.8247711494412545e-06, "loss": 0.6746, "step": 4625 }, { "epoch": 0.67, "grad_norm": 5.255532741546631, "learning_rate": 1.8246816355869185e-06, "loss": 0.6758, "step": 4626 }, { "epoch": 0.67, "grad_norm": 5.22171688079834, "learning_rate": 1.8245921010715242e-06, "loss": 0.6853, "step": 4627 }, { "epoch": 0.67, "grad_norm": 5.851222991943359, "learning_rate": 1.8245025458973146e-06, "loss": 0.7498, "step": 4628 }, { "epoch": 0.67, "grad_norm": 5.557663917541504, "learning_rate": 1.8244129700665335e-06, "loss": 0.7134, "step": 4629 }, { "epoch": 0.67, "grad_norm": 5.944317817687988, "learning_rate": 1.8243233735814246e-06, "loss": 0.7114, "step": 4630 }, { "epoch": 0.67, "grad_norm": 5.885355472564697, "learning_rate": 1.824233756444233e-06, "loss": 0.7471, "step": 4631 }, { "epoch": 0.67, "grad_norm": 5.248122215270996, "learning_rate": 1.8241441186572044e-06, "loss": 0.7182, "step": 4632 }, { "epoch": 0.67, "grad_norm": 5.845627307891846, "learning_rate": 1.8240544602225835e-06, "loss": 0.7791, "step": 4633 }, { "epoch": 0.67, "grad_norm": 5.99174690246582, "learning_rate": 1.8239647811426173e-06, "loss": 0.7844, "step": 4634 }, { "epoch": 0.67, "grad_norm": 5.389059066772461, "learning_rate": 1.823875081419552e-06, "loss": 0.7341, "step": 4635 }, { "epoch": 0.67, "grad_norm": 6.073662757873535, "learning_rate": 1.8237853610556356e-06, "loss": 0.7216, "step": 4636 }, { "epoch": 0.67, "grad_norm": 5.438852787017822, "learning_rate": 1.8236956200531153e-06, "loss": 0.7246, "step": 4637 }, { "epoch": 0.67, "grad_norm": 5.671800136566162, "learning_rate": 1.8236058584142396e-06, "loss": 0.6697, "step": 4638 }, { "epoch": 0.67, "grad_norm": 5.904508590698242, "learning_rate": 1.8235160761412574e-06, "loss": 0.7337, "step": 4639 }, { "epoch": 0.67, "grad_norm": 5.901854038238525, "learning_rate": 1.8234262732364178e-06, "loss": 0.8588, "step": 4640 }, { "epoch": 0.67, "grad_norm": 5.970644474029541, "learning_rate": 1.8233364497019709e-06, "loss": 0.7367, "step": 4641 }, { "epoch": 0.67, "grad_norm": 6.452365398406982, "learning_rate": 1.823246605540167e-06, "loss": 0.7727, "step": 4642 }, { "epoch": 0.67, "grad_norm": 5.298095703125, "learning_rate": 1.8231567407532572e-06, "loss": 0.6919, "step": 4643 }, { "epoch": 0.67, "grad_norm": 6.319982528686523, "learning_rate": 1.8230668553434927e-06, "loss": 0.8055, "step": 4644 }, { "epoch": 0.67, "grad_norm": 5.16929292678833, "learning_rate": 1.8229769493131255e-06, "loss": 0.7109, "step": 4645 }, { "epoch": 0.67, "grad_norm": 5.278615474700928, "learning_rate": 1.822887022664408e-06, "loss": 0.6639, "step": 4646 }, { "epoch": 0.67, "grad_norm": 5.157205104827881, "learning_rate": 1.8227970753995931e-06, "loss": 0.6432, "step": 4647 }, { "epoch": 0.67, "grad_norm": 5.908945083618164, "learning_rate": 1.8227071075209344e-06, "loss": 0.7262, "step": 4648 }, { "epoch": 0.67, "grad_norm": 5.734438896179199, "learning_rate": 1.8226171190306858e-06, "loss": 0.6986, "step": 4649 }, { "epoch": 0.67, "grad_norm": 5.275855541229248, "learning_rate": 1.8225271099311023e-06, "loss": 0.6423, "step": 4650 }, { "epoch": 0.67, "grad_norm": 5.649593353271484, "learning_rate": 1.8224370802244383e-06, "loss": 0.7265, "step": 4651 }, { "epoch": 0.67, "grad_norm": 5.792106628417969, "learning_rate": 1.8223470299129496e-06, "loss": 0.7502, "step": 4652 }, { "epoch": 0.68, "grad_norm": 6.309348106384277, "learning_rate": 1.822256958998892e-06, "loss": 0.7499, "step": 4653 }, { "epoch": 0.68, "grad_norm": 5.541864395141602, "learning_rate": 1.8221668674845226e-06, "loss": 0.6457, "step": 4654 }, { "epoch": 0.68, "grad_norm": 5.815029144287109, "learning_rate": 1.8220767553720981e-06, "loss": 0.6277, "step": 4655 }, { "epoch": 0.68, "grad_norm": 5.5988688468933105, "learning_rate": 1.8219866226638763e-06, "loss": 0.6371, "step": 4656 }, { "epoch": 0.68, "grad_norm": 5.440144062042236, "learning_rate": 1.8218964693621151e-06, "loss": 0.7873, "step": 4657 }, { "epoch": 0.68, "grad_norm": 6.166650772094727, "learning_rate": 1.8218062954690734e-06, "loss": 0.7243, "step": 4658 }, { "epoch": 0.68, "grad_norm": 5.798358917236328, "learning_rate": 1.82171610098701e-06, "loss": 0.7229, "step": 4659 }, { "epoch": 0.68, "grad_norm": 6.0390825271606445, "learning_rate": 1.8216258859181855e-06, "loss": 0.6745, "step": 4660 }, { "epoch": 0.68, "grad_norm": 6.176283359527588, "learning_rate": 1.821535650264859e-06, "loss": 0.7694, "step": 4661 }, { "epoch": 0.68, "grad_norm": 6.074918746948242, "learning_rate": 1.8214453940292916e-06, "loss": 0.7424, "step": 4662 }, { "epoch": 0.68, "grad_norm": 5.133482456207275, "learning_rate": 1.8213551172137445e-06, "loss": 0.6488, "step": 4663 }, { "epoch": 0.68, "grad_norm": 6.380675792694092, "learning_rate": 1.8212648198204797e-06, "loss": 0.6593, "step": 4664 }, { "epoch": 0.68, "grad_norm": 5.732356548309326, "learning_rate": 1.8211745018517591e-06, "loss": 0.7543, "step": 4665 }, { "epoch": 0.68, "grad_norm": 5.937206745147705, "learning_rate": 1.821084163309846e-06, "loss": 0.6966, "step": 4666 }, { "epoch": 0.68, "grad_norm": 6.364903450012207, "learning_rate": 1.8209938041970027e-06, "loss": 0.733, "step": 4667 }, { "epoch": 0.68, "grad_norm": 6.289332389831543, "learning_rate": 1.8209034245154942e-06, "loss": 0.8119, "step": 4668 }, { "epoch": 0.68, "grad_norm": 6.253585338592529, "learning_rate": 1.8208130242675837e-06, "loss": 0.7308, "step": 4669 }, { "epoch": 0.68, "grad_norm": 6.1479268074035645, "learning_rate": 1.820722603455537e-06, "loss": 0.7665, "step": 4670 }, { "epoch": 0.68, "grad_norm": 6.238118648529053, "learning_rate": 1.8206321620816188e-06, "loss": 0.7855, "step": 4671 }, { "epoch": 0.68, "grad_norm": 6.250890731811523, "learning_rate": 1.820541700148095e-06, "loss": 0.8422, "step": 4672 }, { "epoch": 0.68, "grad_norm": 5.733734130859375, "learning_rate": 1.8204512176572325e-06, "loss": 0.7163, "step": 4673 }, { "epoch": 0.68, "grad_norm": 5.637238502502441, "learning_rate": 1.8203607146112975e-06, "loss": 0.7498, "step": 4674 }, { "epoch": 0.68, "grad_norm": 5.7988762855529785, "learning_rate": 1.820270191012558e-06, "loss": 0.7771, "step": 4675 }, { "epoch": 0.68, "grad_norm": 5.68266487121582, "learning_rate": 1.8201796468632816e-06, "loss": 0.767, "step": 4676 }, { "epoch": 0.68, "grad_norm": 5.500659465789795, "learning_rate": 1.8200890821657365e-06, "loss": 0.7258, "step": 4677 }, { "epoch": 0.68, "grad_norm": 6.293189525604248, "learning_rate": 1.8199984969221927e-06, "loss": 0.6478, "step": 4678 }, { "epoch": 0.68, "grad_norm": 6.047382354736328, "learning_rate": 1.8199078911349182e-06, "loss": 0.7406, "step": 4679 }, { "epoch": 0.68, "grad_norm": 6.985960006713867, "learning_rate": 1.819817264806184e-06, "loss": 0.8209, "step": 4680 }, { "epoch": 0.68, "grad_norm": 6.155553340911865, "learning_rate": 1.8197266179382603e-06, "loss": 0.7094, "step": 4681 }, { "epoch": 0.68, "grad_norm": 6.010833263397217, "learning_rate": 1.819635950533418e-06, "loss": 0.7788, "step": 4682 }, { "epoch": 0.68, "grad_norm": 5.89796781539917, "learning_rate": 1.8195452625939287e-06, "loss": 0.7022, "step": 4683 }, { "epoch": 0.68, "grad_norm": 5.924938678741455, "learning_rate": 1.8194545541220644e-06, "loss": 0.7715, "step": 4684 }, { "epoch": 0.68, "grad_norm": 5.576960563659668, "learning_rate": 1.819363825120098e-06, "loss": 0.6623, "step": 4685 }, { "epoch": 0.68, "grad_norm": 5.337643146514893, "learning_rate": 1.819273075590302e-06, "loss": 0.7646, "step": 4686 }, { "epoch": 0.68, "grad_norm": 5.3154778480529785, "learning_rate": 1.8191823055349504e-06, "loss": 0.6422, "step": 4687 }, { "epoch": 0.68, "grad_norm": 5.693981170654297, "learning_rate": 1.8190915149563174e-06, "loss": 0.6315, "step": 4688 }, { "epoch": 0.68, "grad_norm": 5.461365222930908, "learning_rate": 1.8190007038566768e-06, "loss": 0.7026, "step": 4689 }, { "epoch": 0.68, "grad_norm": 5.740765571594238, "learning_rate": 1.8189098722383048e-06, "loss": 0.7155, "step": 4690 }, { "epoch": 0.68, "grad_norm": 5.261455535888672, "learning_rate": 1.8188190201034765e-06, "loss": 0.7099, "step": 4691 }, { "epoch": 0.68, "grad_norm": 5.566002368927002, "learning_rate": 1.8187281474544679e-06, "loss": 0.7224, "step": 4692 }, { "epoch": 0.68, "grad_norm": 5.821627616882324, "learning_rate": 1.8186372542935559e-06, "loss": 0.7421, "step": 4693 }, { "epoch": 0.68, "grad_norm": 6.004394054412842, "learning_rate": 1.8185463406230177e-06, "loss": 0.665, "step": 4694 }, { "epoch": 0.68, "grad_norm": 6.167173385620117, "learning_rate": 1.818455406445131e-06, "loss": 0.7324, "step": 4695 }, { "epoch": 0.68, "grad_norm": 6.1900482177734375, "learning_rate": 1.8183644517621737e-06, "loss": 0.747, "step": 4696 }, { "epoch": 0.68, "grad_norm": 5.7686543464660645, "learning_rate": 1.818273476576425e-06, "loss": 0.7517, "step": 4697 }, { "epoch": 0.68, "grad_norm": 5.631824493408203, "learning_rate": 1.8181824808901639e-06, "loss": 0.7776, "step": 4698 }, { "epoch": 0.68, "grad_norm": 5.286324501037598, "learning_rate": 1.8180914647056699e-06, "loss": 0.6896, "step": 4699 }, { "epoch": 0.68, "grad_norm": 5.931482791900635, "learning_rate": 1.8180004280252237e-06, "loss": 0.7157, "step": 4700 }, { "epoch": 0.68, "grad_norm": 4.941140651702881, "learning_rate": 1.817909370851106e-06, "loss": 0.6274, "step": 4701 }, { "epoch": 0.68, "grad_norm": 6.059906959533691, "learning_rate": 1.8178182931855978e-06, "loss": 0.8089, "step": 4702 }, { "epoch": 0.68, "grad_norm": 5.0327887535095215, "learning_rate": 1.8177271950309811e-06, "loss": 0.7274, "step": 4703 }, { "epoch": 0.68, "grad_norm": 5.543360233306885, "learning_rate": 1.817636076389538e-06, "loss": 0.7125, "step": 4704 }, { "epoch": 0.68, "grad_norm": 5.514522075653076, "learning_rate": 1.8175449372635518e-06, "loss": 0.776, "step": 4705 }, { "epoch": 0.68, "grad_norm": 5.235450267791748, "learning_rate": 1.8174537776553057e-06, "loss": 0.6956, "step": 4706 }, { "epoch": 0.68, "grad_norm": 5.2289204597473145, "learning_rate": 1.817362597567083e-06, "loss": 0.669, "step": 4707 }, { "epoch": 0.68, "grad_norm": 5.185337543487549, "learning_rate": 1.817271397001169e-06, "loss": 0.6587, "step": 4708 }, { "epoch": 0.68, "grad_norm": 5.5828471183776855, "learning_rate": 1.8171801759598475e-06, "loss": 0.7235, "step": 4709 }, { "epoch": 0.68, "grad_norm": 5.570592880249023, "learning_rate": 1.817088934445405e-06, "loss": 0.6651, "step": 4710 }, { "epoch": 0.68, "grad_norm": 6.493209362030029, "learning_rate": 1.8169976724601267e-06, "loss": 0.6579, "step": 4711 }, { "epoch": 0.68, "grad_norm": 5.75732421875, "learning_rate": 1.8169063900062992e-06, "loss": 0.6931, "step": 4712 }, { "epoch": 0.68, "grad_norm": 6.178171634674072, "learning_rate": 1.8168150870862092e-06, "loss": 0.7104, "step": 4713 }, { "epoch": 0.68, "grad_norm": 5.565242290496826, "learning_rate": 1.8167237637021444e-06, "loss": 0.7698, "step": 4714 }, { "epoch": 0.68, "grad_norm": 5.635687828063965, "learning_rate": 1.816632419856393e-06, "loss": 0.7537, "step": 4715 }, { "epoch": 0.68, "grad_norm": 5.344938278198242, "learning_rate": 1.816541055551243e-06, "loss": 0.7377, "step": 4716 }, { "epoch": 0.68, "grad_norm": 5.227280616760254, "learning_rate": 1.8164496707889837e-06, "loss": 0.7458, "step": 4717 }, { "epoch": 0.68, "grad_norm": 5.5485639572143555, "learning_rate": 1.8163582655719046e-06, "loss": 0.7436, "step": 4718 }, { "epoch": 0.68, "grad_norm": 5.089516639709473, "learning_rate": 1.8162668399022952e-06, "loss": 0.702, "step": 4719 }, { "epoch": 0.68, "grad_norm": 5.488312721252441, "learning_rate": 1.816175393782447e-06, "loss": 0.6734, "step": 4720 }, { "epoch": 0.68, "grad_norm": 6.130244731903076, "learning_rate": 1.8160839272146501e-06, "loss": 0.6277, "step": 4721 }, { "epoch": 0.69, "grad_norm": 5.639150142669678, "learning_rate": 1.8159924402011963e-06, "loss": 0.7295, "step": 4722 }, { "epoch": 0.69, "grad_norm": 5.675781726837158, "learning_rate": 1.8159009327443781e-06, "loss": 0.7616, "step": 4723 }, { "epoch": 0.69, "grad_norm": 5.7881293296813965, "learning_rate": 1.8158094048464875e-06, "loss": 0.7675, "step": 4724 }, { "epoch": 0.69, "grad_norm": 5.507203102111816, "learning_rate": 1.815717856509818e-06, "loss": 0.7773, "step": 4725 }, { "epoch": 0.69, "grad_norm": 6.2479143142700195, "learning_rate": 1.8156262877366629e-06, "loss": 0.7839, "step": 4726 }, { "epoch": 0.69, "grad_norm": 5.929621696472168, "learning_rate": 1.815534698529316e-06, "loss": 0.6571, "step": 4727 }, { "epoch": 0.69, "grad_norm": 5.9321465492248535, "learning_rate": 1.8154430888900726e-06, "loss": 0.7489, "step": 4728 }, { "epoch": 0.69, "grad_norm": 5.444230556488037, "learning_rate": 1.815351458821228e-06, "loss": 0.6796, "step": 4729 }, { "epoch": 0.69, "grad_norm": 5.567171096801758, "learning_rate": 1.815259808325077e-06, "loss": 0.6715, "step": 4730 }, { "epoch": 0.69, "grad_norm": 5.95755672454834, "learning_rate": 1.8151681374039163e-06, "loss": 0.7526, "step": 4731 }, { "epoch": 0.69, "grad_norm": 5.261410236358643, "learning_rate": 1.8150764460600425e-06, "loss": 0.6455, "step": 4732 }, { "epoch": 0.69, "grad_norm": 5.345211505889893, "learning_rate": 1.8149847342957524e-06, "loss": 0.7166, "step": 4733 }, { "epoch": 0.69, "grad_norm": 5.8147053718566895, "learning_rate": 1.8148930021133442e-06, "loss": 0.8122, "step": 4734 }, { "epoch": 0.69, "grad_norm": 6.316220283508301, "learning_rate": 1.8148012495151156e-06, "loss": 0.7796, "step": 4735 }, { "epoch": 0.69, "grad_norm": 5.6161932945251465, "learning_rate": 1.814709476503366e-06, "loss": 0.7791, "step": 4736 }, { "epoch": 0.69, "grad_norm": 5.4837260246276855, "learning_rate": 1.814617683080394e-06, "loss": 0.7147, "step": 4737 }, { "epoch": 0.69, "grad_norm": 5.301862716674805, "learning_rate": 1.8145258692484999e-06, "loss": 0.7324, "step": 4738 }, { "epoch": 0.69, "grad_norm": 6.261810779571533, "learning_rate": 1.8144340350099832e-06, "loss": 0.6867, "step": 4739 }, { "epoch": 0.69, "grad_norm": 5.229172229766846, "learning_rate": 1.8143421803671452e-06, "loss": 0.6557, "step": 4740 }, { "epoch": 0.69, "grad_norm": 5.596210479736328, "learning_rate": 1.8142503053222873e-06, "loss": 0.8362, "step": 4741 }, { "epoch": 0.69, "grad_norm": 6.310554504394531, "learning_rate": 1.8141584098777107e-06, "loss": 0.7291, "step": 4742 }, { "epoch": 0.69, "grad_norm": 5.20488977432251, "learning_rate": 1.814066494035718e-06, "loss": 0.7704, "step": 4743 }, { "epoch": 0.69, "grad_norm": 5.4916887283325195, "learning_rate": 1.8139745577986121e-06, "loss": 0.7706, "step": 4744 }, { "epoch": 0.69, "grad_norm": 5.517752647399902, "learning_rate": 1.8138826011686964e-06, "loss": 0.6963, "step": 4745 }, { "epoch": 0.69, "grad_norm": 5.276562690734863, "learning_rate": 1.8137906241482744e-06, "loss": 0.6683, "step": 4746 }, { "epoch": 0.69, "grad_norm": 5.726274490356445, "learning_rate": 1.8136986267396503e-06, "loss": 0.7988, "step": 4747 }, { "epoch": 0.69, "grad_norm": 6.030703067779541, "learning_rate": 1.8136066089451297e-06, "loss": 0.7457, "step": 4748 }, { "epoch": 0.69, "grad_norm": 5.991140365600586, "learning_rate": 1.8135145707670171e-06, "loss": 0.8247, "step": 4749 }, { "epoch": 0.69, "grad_norm": 5.705694198608398, "learning_rate": 1.813422512207619e-06, "loss": 0.6389, "step": 4750 }, { "epoch": 0.69, "grad_norm": 4.930497646331787, "learning_rate": 1.8133304332692413e-06, "loss": 0.6355, "step": 4751 }, { "epoch": 0.69, "grad_norm": 6.384420394897461, "learning_rate": 1.8132383339541914e-06, "loss": 0.7963, "step": 4752 }, { "epoch": 0.69, "grad_norm": 5.8491644859313965, "learning_rate": 1.8131462142647763e-06, "loss": 0.7197, "step": 4753 }, { "epoch": 0.69, "grad_norm": 5.696547031402588, "learning_rate": 1.8130540742033041e-06, "loss": 0.7272, "step": 4754 }, { "epoch": 0.69, "grad_norm": 6.096805572509766, "learning_rate": 1.812961913772083e-06, "loss": 0.6846, "step": 4755 }, { "epoch": 0.69, "grad_norm": 5.530910015106201, "learning_rate": 1.812869732973422e-06, "loss": 0.6635, "step": 4756 }, { "epoch": 0.69, "grad_norm": 5.97659158706665, "learning_rate": 1.812777531809631e-06, "loss": 0.696, "step": 4757 }, { "epoch": 0.69, "grad_norm": 5.75151252746582, "learning_rate": 1.8126853102830192e-06, "loss": 0.7397, "step": 4758 }, { "epoch": 0.69, "grad_norm": 5.630131244659424, "learning_rate": 1.8125930683958976e-06, "loss": 0.6974, "step": 4759 }, { "epoch": 0.69, "grad_norm": 5.423230171203613, "learning_rate": 1.8125008061505768e-06, "loss": 0.6323, "step": 4760 }, { "epoch": 0.69, "grad_norm": 5.585533618927002, "learning_rate": 1.8124085235493687e-06, "loss": 0.6511, "step": 4761 }, { "epoch": 0.69, "grad_norm": 5.8863959312438965, "learning_rate": 1.8123162205945851e-06, "loss": 0.8532, "step": 4762 }, { "epoch": 0.69, "grad_norm": 6.181491374969482, "learning_rate": 1.8122238972885383e-06, "loss": 0.8477, "step": 4763 }, { "epoch": 0.69, "grad_norm": 5.01619291305542, "learning_rate": 1.8121315536335415e-06, "loss": 0.734, "step": 4764 }, { "epoch": 0.69, "grad_norm": 5.841496467590332, "learning_rate": 1.8120391896319083e-06, "loss": 0.681, "step": 4765 }, { "epoch": 0.69, "grad_norm": 5.631802558898926, "learning_rate": 1.8119468052859524e-06, "loss": 0.6586, "step": 4766 }, { "epoch": 0.69, "grad_norm": 4.890739917755127, "learning_rate": 1.8118544005979887e-06, "loss": 0.6926, "step": 4767 }, { "epoch": 0.69, "grad_norm": 5.3022685050964355, "learning_rate": 1.8117619755703323e-06, "loss": 0.7064, "step": 4768 }, { "epoch": 0.69, "grad_norm": 5.088233947753906, "learning_rate": 1.8116695302052985e-06, "loss": 0.739, "step": 4769 }, { "epoch": 0.69, "grad_norm": 5.9384894371032715, "learning_rate": 1.811577064505203e-06, "loss": 0.7664, "step": 4770 }, { "epoch": 0.69, "grad_norm": 6.092119216918945, "learning_rate": 1.8114845784723633e-06, "loss": 0.752, "step": 4771 }, { "epoch": 0.69, "grad_norm": 5.998546600341797, "learning_rate": 1.8113920721090961e-06, "loss": 0.6554, "step": 4772 }, { "epoch": 0.69, "grad_norm": 5.808728218078613, "learning_rate": 1.8112995454177186e-06, "loss": 0.7992, "step": 4773 }, { "epoch": 0.69, "grad_norm": 5.373214244842529, "learning_rate": 1.8112069984005495e-06, "loss": 0.7104, "step": 4774 }, { "epoch": 0.69, "grad_norm": 6.666818618774414, "learning_rate": 1.8111144310599069e-06, "loss": 0.7671, "step": 4775 }, { "epoch": 0.69, "grad_norm": 5.89195442199707, "learning_rate": 1.8110218433981102e-06, "loss": 0.6452, "step": 4776 }, { "epoch": 0.69, "grad_norm": 5.928396224975586, "learning_rate": 1.810929235417479e-06, "loss": 0.7747, "step": 4777 }, { "epoch": 0.69, "grad_norm": 6.121983051300049, "learning_rate": 1.8108366071203333e-06, "loss": 0.6702, "step": 4778 }, { "epoch": 0.69, "grad_norm": 5.636041164398193, "learning_rate": 1.8107439585089938e-06, "loss": 0.7353, "step": 4779 }, { "epoch": 0.69, "grad_norm": 4.984182834625244, "learning_rate": 1.810651289585782e-06, "loss": 0.6145, "step": 4780 }, { "epoch": 0.69, "grad_norm": 5.6525492668151855, "learning_rate": 1.8105586003530197e-06, "loss": 0.6926, "step": 4781 }, { "epoch": 0.69, "grad_norm": 5.710177421569824, "learning_rate": 1.810465890813028e-06, "loss": 0.6906, "step": 4782 }, { "epoch": 0.69, "grad_norm": 5.600845813751221, "learning_rate": 1.8103731609681306e-06, "loss": 0.7348, "step": 4783 }, { "epoch": 0.69, "grad_norm": 5.56523323059082, "learning_rate": 1.81028041082065e-06, "loss": 0.7919, "step": 4784 }, { "epoch": 0.69, "grad_norm": 5.474453449249268, "learning_rate": 1.8101876403729106e-06, "loss": 0.6618, "step": 4785 }, { "epoch": 0.69, "grad_norm": 6.206287860870361, "learning_rate": 1.8100948496272363e-06, "loss": 0.7574, "step": 4786 }, { "epoch": 0.69, "grad_norm": 5.326762676239014, "learning_rate": 1.8100020385859516e-06, "loss": 0.7278, "step": 4787 }, { "epoch": 0.69, "grad_norm": 5.825016021728516, "learning_rate": 1.809909207251382e-06, "loss": 0.7277, "step": 4788 }, { "epoch": 0.69, "grad_norm": 6.216651916503906, "learning_rate": 1.8098163556258535e-06, "loss": 0.687, "step": 4789 }, { "epoch": 0.7, "grad_norm": 6.442729949951172, "learning_rate": 1.8097234837116917e-06, "loss": 0.8036, "step": 4790 }, { "epoch": 0.7, "grad_norm": 5.244536876678467, "learning_rate": 1.8096305915112237e-06, "loss": 0.683, "step": 4791 }, { "epoch": 0.7, "grad_norm": 5.711132049560547, "learning_rate": 1.8095376790267768e-06, "loss": 0.7587, "step": 4792 }, { "epoch": 0.7, "grad_norm": 5.7151336669921875, "learning_rate": 1.8094447462606787e-06, "loss": 0.7186, "step": 4793 }, { "epoch": 0.7, "grad_norm": 5.678067684173584, "learning_rate": 1.8093517932152577e-06, "loss": 0.7058, "step": 4794 }, { "epoch": 0.7, "grad_norm": 5.186387538909912, "learning_rate": 1.8092588198928426e-06, "loss": 0.7128, "step": 4795 }, { "epoch": 0.7, "grad_norm": 5.252081871032715, "learning_rate": 1.8091658262957628e-06, "loss": 0.7161, "step": 4796 }, { "epoch": 0.7, "grad_norm": 5.329916000366211, "learning_rate": 1.809072812426348e-06, "loss": 0.7073, "step": 4797 }, { "epoch": 0.7, "grad_norm": 5.674922943115234, "learning_rate": 1.808979778286928e-06, "loss": 0.8006, "step": 4798 }, { "epoch": 0.7, "grad_norm": 5.863002300262451, "learning_rate": 1.8088867238798345e-06, "loss": 0.6531, "step": 4799 }, { "epoch": 0.7, "grad_norm": 5.258959770202637, "learning_rate": 1.8087936492073984e-06, "loss": 0.6199, "step": 4800 }, { "epoch": 0.7, "grad_norm": 6.3671698570251465, "learning_rate": 1.8087005542719516e-06, "loss": 0.7458, "step": 4801 }, { "epoch": 0.7, "grad_norm": 6.0491437911987305, "learning_rate": 1.8086074390758263e-06, "loss": 0.7087, "step": 4802 }, { "epoch": 0.7, "grad_norm": 6.314694881439209, "learning_rate": 1.8085143036213554e-06, "loss": 0.6828, "step": 4803 }, { "epoch": 0.7, "grad_norm": 5.22830867767334, "learning_rate": 1.8084211479108724e-06, "loss": 0.6625, "step": 4804 }, { "epoch": 0.7, "grad_norm": 5.33031702041626, "learning_rate": 1.8083279719467112e-06, "loss": 0.7216, "step": 4805 }, { "epoch": 0.7, "grad_norm": 5.726405143737793, "learning_rate": 1.808234775731206e-06, "loss": 0.7363, "step": 4806 }, { "epoch": 0.7, "grad_norm": 5.707812309265137, "learning_rate": 1.8081415592666917e-06, "loss": 0.7663, "step": 4807 }, { "epoch": 0.7, "grad_norm": 5.241118431091309, "learning_rate": 1.808048322555504e-06, "loss": 0.6993, "step": 4808 }, { "epoch": 0.7, "grad_norm": 6.058234691619873, "learning_rate": 1.807955065599978e-06, "loss": 0.7368, "step": 4809 }, { "epoch": 0.7, "grad_norm": 5.369960784912109, "learning_rate": 1.807861788402451e-06, "loss": 0.759, "step": 4810 }, { "epoch": 0.7, "grad_norm": 6.085034370422363, "learning_rate": 1.8077684909652594e-06, "loss": 0.7126, "step": 4811 }, { "epoch": 0.7, "grad_norm": 5.648503303527832, "learning_rate": 1.8076751732907408e-06, "loss": 0.6997, "step": 4812 }, { "epoch": 0.7, "grad_norm": 5.290973663330078, "learning_rate": 1.807581835381233e-06, "loss": 0.6986, "step": 4813 }, { "epoch": 0.7, "grad_norm": 5.390271186828613, "learning_rate": 1.8074884772390743e-06, "loss": 0.6841, "step": 4814 }, { "epoch": 0.7, "grad_norm": 5.378775119781494, "learning_rate": 1.8073950988666043e-06, "loss": 0.8306, "step": 4815 }, { "epoch": 0.7, "grad_norm": 5.853006839752197, "learning_rate": 1.8073017002661617e-06, "loss": 0.7765, "step": 4816 }, { "epoch": 0.7, "grad_norm": 5.832183837890625, "learning_rate": 1.8072082814400869e-06, "loss": 0.7633, "step": 4817 }, { "epoch": 0.7, "grad_norm": 5.947519302368164, "learning_rate": 1.8071148423907197e-06, "loss": 0.7669, "step": 4818 }, { "epoch": 0.7, "grad_norm": 5.255170822143555, "learning_rate": 1.807021383120402e-06, "loss": 0.6495, "step": 4819 }, { "epoch": 0.7, "grad_norm": 5.6110053062438965, "learning_rate": 1.8069279036314748e-06, "loss": 0.7504, "step": 4820 }, { "epoch": 0.7, "grad_norm": 5.227149486541748, "learning_rate": 1.8068344039262796e-06, "loss": 0.6351, "step": 4821 }, { "epoch": 0.7, "grad_norm": 5.358731746673584, "learning_rate": 1.80674088400716e-06, "loss": 0.7193, "step": 4822 }, { "epoch": 0.7, "grad_norm": 6.680231094360352, "learning_rate": 1.8066473438764583e-06, "loss": 0.7448, "step": 4823 }, { "epoch": 0.7, "grad_norm": 5.255885601043701, "learning_rate": 1.8065537835365178e-06, "loss": 0.7339, "step": 4824 }, { "epoch": 0.7, "grad_norm": 5.864251136779785, "learning_rate": 1.8064602029896829e-06, "loss": 0.6203, "step": 4825 }, { "epoch": 0.7, "grad_norm": 5.524731159210205, "learning_rate": 1.806366602238298e-06, "loss": 0.7856, "step": 4826 }, { "epoch": 0.7, "grad_norm": 5.673464775085449, "learning_rate": 1.806272981284708e-06, "loss": 0.7025, "step": 4827 }, { "epoch": 0.7, "grad_norm": 5.5304765701293945, "learning_rate": 1.8061793401312583e-06, "loss": 0.7168, "step": 4828 }, { "epoch": 0.7, "grad_norm": 4.8704633712768555, "learning_rate": 1.8060856787802955e-06, "loss": 0.6484, "step": 4829 }, { "epoch": 0.7, "grad_norm": 5.581552982330322, "learning_rate": 1.8059919972341659e-06, "loss": 0.6284, "step": 4830 }, { "epoch": 0.7, "grad_norm": 6.500818252563477, "learning_rate": 1.805898295495216e-06, "loss": 0.7773, "step": 4831 }, { "epoch": 0.7, "grad_norm": 5.4246697425842285, "learning_rate": 1.8058045735657938e-06, "loss": 0.6997, "step": 4832 }, { "epoch": 0.7, "grad_norm": 5.175662040710449, "learning_rate": 1.8057108314482474e-06, "loss": 0.6869, "step": 4833 }, { "epoch": 0.7, "grad_norm": 5.8080902099609375, "learning_rate": 1.8056170691449255e-06, "loss": 0.7061, "step": 4834 }, { "epoch": 0.7, "grad_norm": 5.894378662109375, "learning_rate": 1.8055232866581767e-06, "loss": 0.7206, "step": 4835 }, { "epoch": 0.7, "grad_norm": 5.895325183868408, "learning_rate": 1.8054294839903505e-06, "loss": 0.7769, "step": 4836 }, { "epoch": 0.7, "grad_norm": 5.688344478607178, "learning_rate": 1.8053356611437978e-06, "loss": 0.7171, "step": 4837 }, { "epoch": 0.7, "grad_norm": 5.491659164428711, "learning_rate": 1.8052418181208684e-06, "loss": 0.7435, "step": 4838 }, { "epoch": 0.7, "grad_norm": 5.40387487411499, "learning_rate": 1.8051479549239134e-06, "loss": 0.774, "step": 4839 }, { "epoch": 0.7, "grad_norm": 5.2506327629089355, "learning_rate": 1.8050540715552852e-06, "loss": 0.7199, "step": 4840 }, { "epoch": 0.7, "grad_norm": 4.885374546051025, "learning_rate": 1.804960168017335e-06, "loss": 0.6848, "step": 4841 }, { "epoch": 0.7, "grad_norm": 5.443251609802246, "learning_rate": 1.8048662443124156e-06, "loss": 0.693, "step": 4842 }, { "epoch": 0.7, "grad_norm": 5.758761405944824, "learning_rate": 1.80477230044288e-06, "loss": 0.7314, "step": 4843 }, { "epoch": 0.7, "grad_norm": 5.648247718811035, "learning_rate": 1.8046783364110825e-06, "loss": 0.7124, "step": 4844 }, { "epoch": 0.7, "grad_norm": 5.454793453216553, "learning_rate": 1.8045843522193765e-06, "loss": 0.6547, "step": 4845 }, { "epoch": 0.7, "grad_norm": 6.411502361297607, "learning_rate": 1.804490347870117e-06, "loss": 0.553, "step": 4846 }, { "epoch": 0.7, "grad_norm": 5.3993144035339355, "learning_rate": 1.804396323365659e-06, "loss": 0.661, "step": 4847 }, { "epoch": 0.7, "grad_norm": 6.014028549194336, "learning_rate": 1.804302278708358e-06, "loss": 0.687, "step": 4848 }, { "epoch": 0.7, "grad_norm": 5.588746547698975, "learning_rate": 1.8042082139005702e-06, "loss": 0.731, "step": 4849 }, { "epoch": 0.7, "grad_norm": 5.909189224243164, "learning_rate": 1.8041141289446525e-06, "loss": 0.7063, "step": 4850 }, { "epoch": 0.7, "grad_norm": 6.431712627410889, "learning_rate": 1.8040200238429618e-06, "loss": 0.6599, "step": 4851 }, { "epoch": 0.7, "grad_norm": 5.685461044311523, "learning_rate": 1.8039258985978557e-06, "loss": 0.6223, "step": 4852 }, { "epoch": 0.7, "grad_norm": 5.704196929931641, "learning_rate": 1.8038317532116925e-06, "loss": 0.713, "step": 4853 }, { "epoch": 0.7, "grad_norm": 5.2347636222839355, "learning_rate": 1.803737587686831e-06, "loss": 0.69, "step": 4854 }, { "epoch": 0.7, "grad_norm": 5.8585591316223145, "learning_rate": 1.8036434020256296e-06, "loss": 0.79, "step": 4855 }, { "epoch": 0.7, "grad_norm": 5.782169818878174, "learning_rate": 1.8035491962304492e-06, "loss": 0.7128, "step": 4856 }, { "epoch": 0.7, "grad_norm": 5.569436073303223, "learning_rate": 1.803454970303649e-06, "loss": 0.6972, "step": 4857 }, { "epoch": 0.7, "grad_norm": 5.727677345275879, "learning_rate": 1.80336072424759e-06, "loss": 0.6994, "step": 4858 }, { "epoch": 0.71, "grad_norm": 5.476333141326904, "learning_rate": 1.8032664580646335e-06, "loss": 0.7672, "step": 4859 }, { "epoch": 0.71, "grad_norm": 5.506664276123047, "learning_rate": 1.8031721717571408e-06, "loss": 0.7316, "step": 4860 }, { "epoch": 0.71, "grad_norm": 5.6160478591918945, "learning_rate": 1.803077865327475e-06, "loss": 0.7106, "step": 4861 }, { "epoch": 0.71, "grad_norm": 5.7201247215271, "learning_rate": 1.8029835387779974e-06, "loss": 0.786, "step": 4862 }, { "epoch": 0.71, "grad_norm": 6.184265613555908, "learning_rate": 1.8028891921110722e-06, "loss": 0.7797, "step": 4863 }, { "epoch": 0.71, "grad_norm": 5.863467216491699, "learning_rate": 1.802794825329063e-06, "loss": 0.8588, "step": 4864 }, { "epoch": 0.71, "grad_norm": 5.355701923370361, "learning_rate": 1.8027004384343337e-06, "loss": 0.7488, "step": 4865 }, { "epoch": 0.71, "grad_norm": 6.380679607391357, "learning_rate": 1.8026060314292493e-06, "loss": 0.7453, "step": 4866 }, { "epoch": 0.71, "grad_norm": 5.222498893737793, "learning_rate": 1.8025116043161748e-06, "loss": 0.6588, "step": 4867 }, { "epoch": 0.71, "grad_norm": 5.498847484588623, "learning_rate": 1.8024171570974759e-06, "loss": 0.7235, "step": 4868 }, { "epoch": 0.71, "grad_norm": 5.211403846740723, "learning_rate": 1.8023226897755191e-06, "loss": 0.6815, "step": 4869 }, { "epoch": 0.71, "grad_norm": 4.978761672973633, "learning_rate": 1.802228202352671e-06, "loss": 0.6371, "step": 4870 }, { "epoch": 0.71, "grad_norm": 5.166091442108154, "learning_rate": 1.8021336948312986e-06, "loss": 0.7465, "step": 4871 }, { "epoch": 0.71, "grad_norm": 5.658570289611816, "learning_rate": 1.8020391672137699e-06, "loss": 0.6974, "step": 4872 }, { "epoch": 0.71, "grad_norm": 5.6060686111450195, "learning_rate": 1.801944619502453e-06, "loss": 0.7378, "step": 4873 }, { "epoch": 0.71, "grad_norm": 5.361217498779297, "learning_rate": 1.8018500516997164e-06, "loss": 0.7088, "step": 4874 }, { "epoch": 0.71, "grad_norm": 5.4558820724487305, "learning_rate": 1.8017554638079303e-06, "loss": 0.7132, "step": 4875 }, { "epoch": 0.71, "grad_norm": 5.207467555999756, "learning_rate": 1.8016608558294632e-06, "loss": 0.6676, "step": 4876 }, { "epoch": 0.71, "grad_norm": 5.5212225914001465, "learning_rate": 1.8015662277666862e-06, "loss": 0.732, "step": 4877 }, { "epoch": 0.71, "grad_norm": 6.341423034667969, "learning_rate": 1.8014715796219695e-06, "loss": 0.6758, "step": 4878 }, { "epoch": 0.71, "grad_norm": 5.976088523864746, "learning_rate": 1.8013769113976847e-06, "loss": 0.7666, "step": 4879 }, { "epoch": 0.71, "grad_norm": 6.15301513671875, "learning_rate": 1.8012822230962037e-06, "loss": 0.7303, "step": 4880 }, { "epoch": 0.71, "grad_norm": 5.678267002105713, "learning_rate": 1.8011875147198982e-06, "loss": 0.6743, "step": 4881 }, { "epoch": 0.71, "grad_norm": 4.968625068664551, "learning_rate": 1.8010927862711417e-06, "loss": 0.64, "step": 4882 }, { "epoch": 0.71, "grad_norm": 6.288710117340088, "learning_rate": 1.8009980377523069e-06, "loss": 0.7649, "step": 4883 }, { "epoch": 0.71, "grad_norm": 5.671555042266846, "learning_rate": 1.8009032691657678e-06, "loss": 0.7402, "step": 4884 }, { "epoch": 0.71, "grad_norm": 5.350963115692139, "learning_rate": 1.8008084805138983e-06, "loss": 0.7318, "step": 4885 }, { "epoch": 0.71, "grad_norm": 5.4953765869140625, "learning_rate": 1.800713671799074e-06, "loss": 0.722, "step": 4886 }, { "epoch": 0.71, "grad_norm": 5.509761333465576, "learning_rate": 1.8006188430236695e-06, "loss": 0.711, "step": 4887 }, { "epoch": 0.71, "grad_norm": 5.854317665100098, "learning_rate": 1.8005239941900606e-06, "loss": 0.5734, "step": 4888 }, { "epoch": 0.71, "grad_norm": 5.4518632888793945, "learning_rate": 1.8004291253006239e-06, "loss": 0.7069, "step": 4889 }, { "epoch": 0.71, "grad_norm": 5.493197917938232, "learning_rate": 1.800334236357736e-06, "loss": 0.62, "step": 4890 }, { "epoch": 0.71, "grad_norm": 6.0835723876953125, "learning_rate": 1.8002393273637742e-06, "loss": 0.7207, "step": 4891 }, { "epoch": 0.71, "grad_norm": 5.729832649230957, "learning_rate": 1.8001443983211162e-06, "loss": 0.6921, "step": 4892 }, { "epoch": 0.71, "grad_norm": 6.125553131103516, "learning_rate": 1.8000494492321404e-06, "loss": 0.8361, "step": 4893 }, { "epoch": 0.71, "grad_norm": 6.215484619140625, "learning_rate": 1.7999544800992256e-06, "loss": 0.6636, "step": 4894 }, { "epoch": 0.71, "grad_norm": 5.857409954071045, "learning_rate": 1.7998594909247513e-06, "loss": 0.6733, "step": 4895 }, { "epoch": 0.71, "grad_norm": 5.435111045837402, "learning_rate": 1.7997644817110968e-06, "loss": 0.6812, "step": 4896 }, { "epoch": 0.71, "grad_norm": 6.074344158172607, "learning_rate": 1.799669452460643e-06, "loss": 0.7212, "step": 4897 }, { "epoch": 0.71, "grad_norm": 5.588922500610352, "learning_rate": 1.7995744031757702e-06, "loss": 0.7379, "step": 4898 }, { "epoch": 0.71, "grad_norm": 5.441688537597656, "learning_rate": 1.79947933385886e-06, "loss": 0.7159, "step": 4899 }, { "epoch": 0.71, "grad_norm": 5.762001991271973, "learning_rate": 1.799384244512294e-06, "loss": 0.7614, "step": 4900 }, { "epoch": 0.71, "grad_norm": 5.086938858032227, "learning_rate": 1.7992891351384547e-06, "loss": 0.7166, "step": 4901 }, { "epoch": 0.71, "grad_norm": 5.813756465911865, "learning_rate": 1.7991940057397247e-06, "loss": 0.7164, "step": 4902 }, { "epoch": 0.71, "grad_norm": 5.478324890136719, "learning_rate": 1.7990988563184877e-06, "loss": 0.6931, "step": 4903 }, { "epoch": 0.71, "grad_norm": 5.902771472930908, "learning_rate": 1.799003686877127e-06, "loss": 0.7743, "step": 4904 }, { "epoch": 0.71, "grad_norm": 5.924462795257568, "learning_rate": 1.7989084974180273e-06, "loss": 0.7394, "step": 4905 }, { "epoch": 0.71, "grad_norm": 5.22786808013916, "learning_rate": 1.7988132879435734e-06, "loss": 0.6777, "step": 4906 }, { "epoch": 0.71, "grad_norm": 5.692117214202881, "learning_rate": 1.7987180584561502e-06, "loss": 0.8274, "step": 4907 }, { "epoch": 0.71, "grad_norm": 5.840412616729736, "learning_rate": 1.7986228089581442e-06, "loss": 0.6565, "step": 4908 }, { "epoch": 0.71, "grad_norm": 5.802181243896484, "learning_rate": 1.798527539451941e-06, "loss": 0.6964, "step": 4909 }, { "epoch": 0.71, "grad_norm": 5.500716209411621, "learning_rate": 1.798432249939928e-06, "loss": 0.7549, "step": 4910 }, { "epoch": 0.71, "grad_norm": 5.4413957595825195, "learning_rate": 1.798336940424492e-06, "loss": 0.6981, "step": 4911 }, { "epoch": 0.71, "grad_norm": 5.948534965515137, "learning_rate": 1.7982416109080214e-06, "loss": 0.6485, "step": 4912 }, { "epoch": 0.71, "grad_norm": 6.015402793884277, "learning_rate": 1.798146261392904e-06, "loss": 0.7896, "step": 4913 }, { "epoch": 0.71, "grad_norm": 5.836892127990723, "learning_rate": 1.798050891881529e-06, "loss": 0.6375, "step": 4914 }, { "epoch": 0.71, "grad_norm": 5.707894325256348, "learning_rate": 1.7979555023762858e-06, "loss": 0.7629, "step": 4915 }, { "epoch": 0.71, "grad_norm": 5.225625991821289, "learning_rate": 1.797860092879564e-06, "loss": 0.706, "step": 4916 }, { "epoch": 0.71, "grad_norm": 5.515149116516113, "learning_rate": 1.7977646633937536e-06, "loss": 0.7492, "step": 4917 }, { "epoch": 0.71, "grad_norm": 5.418254375457764, "learning_rate": 1.797669213921246e-06, "loss": 0.6477, "step": 4918 }, { "epoch": 0.71, "grad_norm": 5.67097282409668, "learning_rate": 1.7975737444644326e-06, "loss": 0.7126, "step": 4919 }, { "epoch": 0.71, "grad_norm": 6.527519702911377, "learning_rate": 1.7974782550257044e-06, "loss": 0.7427, "step": 4920 }, { "epoch": 0.71, "grad_norm": 5.941915988922119, "learning_rate": 1.7973827456074547e-06, "loss": 0.7631, "step": 4921 }, { "epoch": 0.71, "grad_norm": 6.409842014312744, "learning_rate": 1.7972872162120756e-06, "loss": 0.7475, "step": 4922 }, { "epoch": 0.71, "grad_norm": 5.04531192779541, "learning_rate": 1.797191666841961e-06, "loss": 0.6828, "step": 4923 }, { "epoch": 0.71, "grad_norm": 5.0238566398620605, "learning_rate": 1.7970960974995047e-06, "loss": 0.7437, "step": 4924 }, { "epoch": 0.71, "grad_norm": 6.348001956939697, "learning_rate": 1.7970005081871005e-06, "loss": 0.6806, "step": 4925 }, { "epoch": 0.71, "grad_norm": 5.461042404174805, "learning_rate": 1.7969048989071434e-06, "loss": 0.6442, "step": 4926 }, { "epoch": 0.71, "grad_norm": 5.884727478027344, "learning_rate": 1.7968092696620293e-06, "loss": 0.7473, "step": 4927 }, { "epoch": 0.72, "grad_norm": 6.56916618347168, "learning_rate": 1.7967136204541534e-06, "loss": 0.7518, "step": 4928 }, { "epoch": 0.72, "grad_norm": 5.422839164733887, "learning_rate": 1.796617951285912e-06, "loss": 0.6703, "step": 4929 }, { "epoch": 0.72, "grad_norm": 5.221067905426025, "learning_rate": 1.7965222621597027e-06, "loss": 0.7523, "step": 4930 }, { "epoch": 0.72, "grad_norm": 5.440127849578857, "learning_rate": 1.796426553077922e-06, "loss": 0.7378, "step": 4931 }, { "epoch": 0.72, "grad_norm": 6.1826324462890625, "learning_rate": 1.7963308240429681e-06, "loss": 0.7453, "step": 4932 }, { "epoch": 0.72, "grad_norm": 6.024804592132568, "learning_rate": 1.7962350750572397e-06, "loss": 0.681, "step": 4933 }, { "epoch": 0.72, "grad_norm": 6.0182390213012695, "learning_rate": 1.796139306123135e-06, "loss": 0.6419, "step": 4934 }, { "epoch": 0.72, "grad_norm": 5.418554306030273, "learning_rate": 1.7960435172430532e-06, "loss": 0.7834, "step": 4935 }, { "epoch": 0.72, "grad_norm": 5.477713584899902, "learning_rate": 1.7959477084193951e-06, "loss": 0.7378, "step": 4936 }, { "epoch": 0.72, "grad_norm": 6.064150810241699, "learning_rate": 1.79585187965456e-06, "loss": 0.7713, "step": 4937 }, { "epoch": 0.72, "grad_norm": 5.388992786407471, "learning_rate": 1.7957560309509495e-06, "loss": 0.6124, "step": 4938 }, { "epoch": 0.72, "grad_norm": 5.455887794494629, "learning_rate": 1.7956601623109644e-06, "loss": 0.6815, "step": 4939 }, { "epoch": 0.72, "grad_norm": 5.030430793762207, "learning_rate": 1.7955642737370067e-06, "loss": 0.6641, "step": 4940 }, { "epoch": 0.72, "grad_norm": 7.096492767333984, "learning_rate": 1.7954683652314788e-06, "loss": 0.7396, "step": 4941 }, { "epoch": 0.72, "grad_norm": 6.026060104370117, "learning_rate": 1.7953724367967837e-06, "loss": 0.8243, "step": 4942 }, { "epoch": 0.72, "grad_norm": 6.427430152893066, "learning_rate": 1.7952764884353244e-06, "loss": 0.7201, "step": 4943 }, { "epoch": 0.72, "grad_norm": 5.41487979888916, "learning_rate": 1.795180520149505e-06, "loss": 0.6403, "step": 4944 }, { "epoch": 0.72, "grad_norm": 5.63819694519043, "learning_rate": 1.7950845319417296e-06, "loss": 0.6795, "step": 4945 }, { "epoch": 0.72, "grad_norm": 5.126875877380371, "learning_rate": 1.794988523814403e-06, "loss": 0.6935, "step": 4946 }, { "epoch": 0.72, "grad_norm": 5.204626083374023, "learning_rate": 1.7948924957699307e-06, "loss": 0.7271, "step": 4947 }, { "epoch": 0.72, "grad_norm": 5.689911842346191, "learning_rate": 1.7947964478107186e-06, "loss": 0.6504, "step": 4948 }, { "epoch": 0.72, "grad_norm": 5.5950751304626465, "learning_rate": 1.7947003799391731e-06, "loss": 0.6702, "step": 4949 }, { "epoch": 0.72, "grad_norm": 5.891750812530518, "learning_rate": 1.7946042921577004e-06, "loss": 0.7227, "step": 4950 }, { "epoch": 0.72, "grad_norm": 6.039445400238037, "learning_rate": 1.7945081844687086e-06, "loss": 0.6393, "step": 4951 }, { "epoch": 0.72, "grad_norm": 6.2423930168151855, "learning_rate": 1.794412056874605e-06, "loss": 0.7326, "step": 4952 }, { "epoch": 0.72, "grad_norm": 5.56876802444458, "learning_rate": 1.7943159093777982e-06, "loss": 0.6935, "step": 4953 }, { "epoch": 0.72, "grad_norm": 4.892408847808838, "learning_rate": 1.794219741980697e-06, "loss": 0.6625, "step": 4954 }, { "epoch": 0.72, "grad_norm": 5.811718463897705, "learning_rate": 1.7941235546857104e-06, "loss": 0.7422, "step": 4955 }, { "epoch": 0.72, "grad_norm": 5.854502201080322, "learning_rate": 1.7940273474952486e-06, "loss": 0.732, "step": 4956 }, { "epoch": 0.72, "grad_norm": 5.262929916381836, "learning_rate": 1.7939311204117217e-06, "loss": 0.691, "step": 4957 }, { "epoch": 0.72, "grad_norm": 5.306562900543213, "learning_rate": 1.7938348734375407e-06, "loss": 0.7554, "step": 4958 }, { "epoch": 0.72, "grad_norm": 7.656566143035889, "learning_rate": 1.7937386065751165e-06, "loss": 0.7364, "step": 4959 }, { "epoch": 0.72, "grad_norm": 5.97252893447876, "learning_rate": 1.7936423198268614e-06, "loss": 0.727, "step": 4960 }, { "epoch": 0.72, "grad_norm": 5.336321830749512, "learning_rate": 1.7935460131951875e-06, "loss": 0.7232, "step": 4961 }, { "epoch": 0.72, "grad_norm": 6.295712471008301, "learning_rate": 1.7934496866825075e-06, "loss": 0.7541, "step": 4962 }, { "epoch": 0.72, "grad_norm": 5.793018817901611, "learning_rate": 1.7933533402912351e-06, "loss": 0.7522, "step": 4963 }, { "epoch": 0.72, "grad_norm": 5.2998199462890625, "learning_rate": 1.7932569740237837e-06, "loss": 0.7318, "step": 4964 }, { "epoch": 0.72, "grad_norm": 5.839883804321289, "learning_rate": 1.7931605878825678e-06, "loss": 0.6962, "step": 4965 }, { "epoch": 0.72, "grad_norm": 5.5938286781311035, "learning_rate": 1.793064181870002e-06, "loss": 0.6547, "step": 4966 }, { "epoch": 0.72, "grad_norm": 5.383233070373535, "learning_rate": 1.7929677559885015e-06, "loss": 0.7105, "step": 4967 }, { "epoch": 0.72, "grad_norm": 5.84649658203125, "learning_rate": 1.7928713102404825e-06, "loss": 0.6869, "step": 4968 }, { "epoch": 0.72, "grad_norm": 5.616865634918213, "learning_rate": 1.7927748446283613e-06, "loss": 0.7991, "step": 4969 }, { "epoch": 0.72, "grad_norm": 5.4960503578186035, "learning_rate": 1.7926783591545545e-06, "loss": 0.6288, "step": 4970 }, { "epoch": 0.72, "grad_norm": 6.107151508331299, "learning_rate": 1.792581853821479e-06, "loss": 0.852, "step": 4971 }, { "epoch": 0.72, "grad_norm": 5.5785813331604, "learning_rate": 1.7924853286315532e-06, "loss": 0.7182, "step": 4972 }, { "epoch": 0.72, "grad_norm": 5.6524658203125, "learning_rate": 1.792388783587195e-06, "loss": 0.7379, "step": 4973 }, { "epoch": 0.72, "grad_norm": 5.620864391326904, "learning_rate": 1.7922922186908235e-06, "loss": 0.6235, "step": 4974 }, { "epoch": 0.72, "grad_norm": 5.524416923522949, "learning_rate": 1.7921956339448577e-06, "loss": 0.6995, "step": 4975 }, { "epoch": 0.72, "grad_norm": 5.463018894195557, "learning_rate": 1.7920990293517174e-06, "loss": 0.7321, "step": 4976 }, { "epoch": 0.72, "grad_norm": 5.7347588539123535, "learning_rate": 1.7920024049138232e-06, "loss": 0.7071, "step": 4977 }, { "epoch": 0.72, "grad_norm": 5.061582088470459, "learning_rate": 1.7919057606335953e-06, "loss": 0.7033, "step": 4978 }, { "epoch": 0.72, "grad_norm": 5.401504039764404, "learning_rate": 1.7918090965134556e-06, "loss": 0.7625, "step": 4979 }, { "epoch": 0.72, "grad_norm": 5.935153007507324, "learning_rate": 1.7917124125558254e-06, "loss": 0.7195, "step": 4980 }, { "epoch": 0.72, "grad_norm": 5.963876247406006, "learning_rate": 1.7916157087631269e-06, "loss": 0.7816, "step": 4981 }, { "epoch": 0.72, "grad_norm": 5.264752388000488, "learning_rate": 1.7915189851377835e-06, "loss": 0.6496, "step": 4982 }, { "epoch": 0.72, "grad_norm": 5.6046576499938965, "learning_rate": 1.7914222416822177e-06, "loss": 0.6437, "step": 4983 }, { "epoch": 0.72, "grad_norm": 5.8973846435546875, "learning_rate": 1.7913254783988536e-06, "loss": 0.6868, "step": 4984 }, { "epoch": 0.72, "grad_norm": 5.306156158447266, "learning_rate": 1.7912286952901155e-06, "loss": 0.6988, "step": 4985 }, { "epoch": 0.72, "grad_norm": 6.062693119049072, "learning_rate": 1.791131892358428e-06, "loss": 0.7604, "step": 4986 }, { "epoch": 0.72, "grad_norm": 5.91363525390625, "learning_rate": 1.7910350696062161e-06, "loss": 0.683, "step": 4987 }, { "epoch": 0.72, "grad_norm": 5.669348239898682, "learning_rate": 1.790938227035906e-06, "loss": 0.7118, "step": 4988 }, { "epoch": 0.72, "grad_norm": 7.097335338592529, "learning_rate": 1.7908413646499243e-06, "loss": 0.8562, "step": 4989 }, { "epoch": 0.72, "grad_norm": 5.555254936218262, "learning_rate": 1.7907444824506968e-06, "loss": 0.7105, "step": 4990 }, { "epoch": 0.72, "grad_norm": 5.79814338684082, "learning_rate": 1.790647580440651e-06, "loss": 0.7233, "step": 4991 }, { "epoch": 0.72, "grad_norm": 5.541120529174805, "learning_rate": 1.7905506586222145e-06, "loss": 0.72, "step": 4992 }, { "epoch": 0.72, "grad_norm": 5.796229362487793, "learning_rate": 1.7904537169978165e-06, "loss": 0.6762, "step": 4993 }, { "epoch": 0.72, "grad_norm": 5.121877193450928, "learning_rate": 1.7903567555698844e-06, "loss": 0.6394, "step": 4994 }, { "epoch": 0.72, "grad_norm": 6.955945014953613, "learning_rate": 1.790259774340848e-06, "loss": 0.7566, "step": 4995 }, { "epoch": 0.72, "grad_norm": 5.0232672691345215, "learning_rate": 1.790162773313137e-06, "loss": 0.6771, "step": 4996 }, { "epoch": 0.73, "grad_norm": 5.503705024719238, "learning_rate": 1.7900657524891817e-06, "loss": 0.8164, "step": 4997 }, { "epoch": 0.73, "grad_norm": 5.424313545227051, "learning_rate": 1.7899687118714128e-06, "loss": 0.6963, "step": 4998 }, { "epoch": 0.73, "grad_norm": 5.208754539489746, "learning_rate": 1.7898716514622614e-06, "loss": 0.7069, "step": 4999 }, { "epoch": 0.73, "grad_norm": 5.355439186096191, "learning_rate": 1.789774571264159e-06, "loss": 0.6253, "step": 5000 }, { "epoch": 0.73, "grad_norm": 5.456361770629883, "learning_rate": 1.789677471279538e-06, "loss": 0.7162, "step": 5001 }, { "epoch": 0.73, "grad_norm": 5.337622165679932, "learning_rate": 1.789580351510831e-06, "loss": 0.7141, "step": 5002 }, { "epoch": 0.73, "grad_norm": 6.431055545806885, "learning_rate": 1.7894832119604716e-06, "loss": 0.7259, "step": 5003 }, { "epoch": 0.73, "grad_norm": 5.430368423461914, "learning_rate": 1.7893860526308926e-06, "loss": 0.6486, "step": 5004 }, { "epoch": 0.73, "grad_norm": 5.688852787017822, "learning_rate": 1.789288873524529e-06, "loss": 0.7115, "step": 5005 }, { "epoch": 0.73, "grad_norm": 5.7428669929504395, "learning_rate": 1.7891916746438147e-06, "loss": 0.6319, "step": 5006 }, { "epoch": 0.73, "grad_norm": 6.021811485290527, "learning_rate": 1.7890944559911857e-06, "loss": 0.7555, "step": 5007 }, { "epoch": 0.73, "grad_norm": 5.831338405609131, "learning_rate": 1.7889972175690773e-06, "loss": 0.7186, "step": 5008 }, { "epoch": 0.73, "grad_norm": 6.1604323387146, "learning_rate": 1.7888999593799255e-06, "loss": 0.7282, "step": 5009 }, { "epoch": 0.73, "grad_norm": 5.857446193695068, "learning_rate": 1.788802681426167e-06, "loss": 0.7773, "step": 5010 }, { "epoch": 0.73, "grad_norm": 6.112237453460693, "learning_rate": 1.7887053837102389e-06, "loss": 0.7359, "step": 5011 }, { "epoch": 0.73, "grad_norm": 5.711928844451904, "learning_rate": 1.788608066234579e-06, "loss": 0.752, "step": 5012 }, { "epoch": 0.73, "grad_norm": 5.259652614593506, "learning_rate": 1.7885107290016252e-06, "loss": 0.6967, "step": 5013 }, { "epoch": 0.73, "grad_norm": 5.752562046051025, "learning_rate": 1.7884133720138165e-06, "loss": 0.7615, "step": 5014 }, { "epoch": 0.73, "grad_norm": 5.180426120758057, "learning_rate": 1.7883159952735918e-06, "loss": 0.6812, "step": 5015 }, { "epoch": 0.73, "grad_norm": 5.837475776672363, "learning_rate": 1.7882185987833907e-06, "loss": 0.8216, "step": 5016 }, { "epoch": 0.73, "grad_norm": 6.418931484222412, "learning_rate": 1.7881211825456532e-06, "loss": 0.7637, "step": 5017 }, { "epoch": 0.73, "grad_norm": 5.343456745147705, "learning_rate": 1.7880237465628204e-06, "loss": 0.7094, "step": 5018 }, { "epoch": 0.73, "grad_norm": 5.397100925445557, "learning_rate": 1.7879262908373324e-06, "loss": 0.6694, "step": 5019 }, { "epoch": 0.73, "grad_norm": 5.5572829246521, "learning_rate": 1.787828815371632e-06, "loss": 0.7779, "step": 5020 }, { "epoch": 0.73, "grad_norm": 5.498353004455566, "learning_rate": 1.7877313201681603e-06, "loss": 0.7179, "step": 5021 }, { "epoch": 0.73, "grad_norm": 5.105309963226318, "learning_rate": 1.7876338052293604e-06, "loss": 0.6664, "step": 5022 }, { "epoch": 0.73, "grad_norm": 5.404567718505859, "learning_rate": 1.7875362705576756e-06, "loss": 0.64, "step": 5023 }, { "epoch": 0.73, "grad_norm": 5.5501909255981445, "learning_rate": 1.7874387161555488e-06, "loss": 0.7648, "step": 5024 }, { "epoch": 0.73, "grad_norm": 5.499835968017578, "learning_rate": 1.7873411420254247e-06, "loss": 0.6604, "step": 5025 }, { "epoch": 0.73, "grad_norm": 5.993160724639893, "learning_rate": 1.7872435481697473e-06, "loss": 0.6881, "step": 5026 }, { "epoch": 0.73, "grad_norm": 5.0868611335754395, "learning_rate": 1.7871459345909622e-06, "loss": 0.7634, "step": 5027 }, { "epoch": 0.73, "grad_norm": 5.733796119689941, "learning_rate": 1.7870483012915144e-06, "loss": 0.6724, "step": 5028 }, { "epoch": 0.73, "grad_norm": 5.3362717628479, "learning_rate": 1.7869506482738505e-06, "loss": 0.6972, "step": 5029 }, { "epoch": 0.73, "grad_norm": 5.230726718902588, "learning_rate": 1.7868529755404165e-06, "loss": 0.6581, "step": 5030 }, { "epoch": 0.73, "grad_norm": 5.240372180938721, "learning_rate": 1.7867552830936601e-06, "loss": 0.7206, "step": 5031 }, { "epoch": 0.73, "grad_norm": 5.507564067840576, "learning_rate": 1.7866575709360282e-06, "loss": 0.693, "step": 5032 }, { "epoch": 0.73, "grad_norm": 5.473395824432373, "learning_rate": 1.786559839069969e-06, "loss": 0.6953, "step": 5033 }, { "epoch": 0.73, "grad_norm": 5.852997779846191, "learning_rate": 1.786462087497931e-06, "loss": 0.7986, "step": 5034 }, { "epoch": 0.73, "grad_norm": 5.603583812713623, "learning_rate": 1.7863643162223634e-06, "loss": 0.7407, "step": 5035 }, { "epoch": 0.73, "grad_norm": 5.954073429107666, "learning_rate": 1.7862665252457155e-06, "loss": 0.7885, "step": 5036 }, { "epoch": 0.73, "grad_norm": 6.122393608093262, "learning_rate": 1.7861687145704375e-06, "loss": 0.7475, "step": 5037 }, { "epoch": 0.73, "grad_norm": 5.686206340789795, "learning_rate": 1.7860708841989792e-06, "loss": 0.6732, "step": 5038 }, { "epoch": 0.73, "grad_norm": 5.578657150268555, "learning_rate": 1.7859730341337928e-06, "loss": 0.6587, "step": 5039 }, { "epoch": 0.73, "grad_norm": 5.0895891189575195, "learning_rate": 1.7858751643773289e-06, "loss": 0.6333, "step": 5040 }, { "epoch": 0.73, "grad_norm": 5.174490928649902, "learning_rate": 1.7857772749320397e-06, "loss": 0.6463, "step": 5041 }, { "epoch": 0.73, "grad_norm": 5.590078353881836, "learning_rate": 1.7856793658003774e-06, "loss": 0.6956, "step": 5042 }, { "epoch": 0.73, "grad_norm": 5.829287052154541, "learning_rate": 1.7855814369847956e-06, "loss": 0.82, "step": 5043 }, { "epoch": 0.73, "grad_norm": 5.9684157371521, "learning_rate": 1.7854834884877467e-06, "loss": 0.7403, "step": 5044 }, { "epoch": 0.73, "grad_norm": 5.257688045501709, "learning_rate": 1.785385520311686e-06, "loss": 0.6679, "step": 5045 }, { "epoch": 0.73, "grad_norm": 6.127303123474121, "learning_rate": 1.7852875324590666e-06, "loss": 0.6578, "step": 5046 }, { "epoch": 0.73, "grad_norm": 5.0336127281188965, "learning_rate": 1.7851895249323442e-06, "loss": 0.6451, "step": 5047 }, { "epoch": 0.73, "grad_norm": 5.467797756195068, "learning_rate": 1.7850914977339742e-06, "loss": 0.7486, "step": 5048 }, { "epoch": 0.73, "grad_norm": 6.395562171936035, "learning_rate": 1.7849934508664122e-06, "loss": 0.6524, "step": 5049 }, { "epoch": 0.73, "grad_norm": 5.585189342498779, "learning_rate": 1.784895384332115e-06, "loss": 0.717, "step": 5050 }, { "epoch": 0.73, "grad_norm": 5.623912334442139, "learning_rate": 1.7847972981335392e-06, "loss": 0.7384, "step": 5051 }, { "epoch": 0.73, "grad_norm": 5.756518363952637, "learning_rate": 1.784699192273142e-06, "loss": 0.6692, "step": 5052 }, { "epoch": 0.73, "grad_norm": 5.472764492034912, "learning_rate": 1.7846010667533817e-06, "loss": 0.6975, "step": 5053 }, { "epoch": 0.73, "grad_norm": 5.938296318054199, "learning_rate": 1.7845029215767165e-06, "loss": 0.6618, "step": 5054 }, { "epoch": 0.73, "grad_norm": 5.840053558349609, "learning_rate": 1.7844047567456055e-06, "loss": 0.7069, "step": 5055 }, { "epoch": 0.73, "grad_norm": 5.227868556976318, "learning_rate": 1.7843065722625075e-06, "loss": 0.7116, "step": 5056 }, { "epoch": 0.73, "grad_norm": 6.559181213378906, "learning_rate": 1.784208368129883e-06, "loss": 0.749, "step": 5057 }, { "epoch": 0.73, "grad_norm": 6.27410888671875, "learning_rate": 1.7841101443501917e-06, "loss": 0.7515, "step": 5058 }, { "epoch": 0.73, "grad_norm": 6.133334159851074, "learning_rate": 1.784011900925895e-06, "loss": 0.813, "step": 5059 }, { "epoch": 0.73, "grad_norm": 5.941268444061279, "learning_rate": 1.783913637859454e-06, "loss": 0.7425, "step": 5060 }, { "epoch": 0.73, "grad_norm": 5.515396595001221, "learning_rate": 1.7838153551533306e-06, "loss": 0.8007, "step": 5061 }, { "epoch": 0.73, "grad_norm": 6.6595048904418945, "learning_rate": 1.7837170528099869e-06, "loss": 0.7908, "step": 5062 }, { "epoch": 0.73, "grad_norm": 5.399593830108643, "learning_rate": 1.783618730831886e-06, "loss": 0.6685, "step": 5063 }, { "epoch": 0.73, "grad_norm": 6.309013843536377, "learning_rate": 1.783520389221491e-06, "loss": 0.7005, "step": 5064 }, { "epoch": 0.73, "grad_norm": 4.92604398727417, "learning_rate": 1.7834220279812656e-06, "loss": 0.7004, "step": 5065 }, { "epoch": 0.74, "grad_norm": 5.4718427658081055, "learning_rate": 1.7833236471136744e-06, "loss": 0.7308, "step": 5066 }, { "epoch": 0.74, "grad_norm": 5.062427997589111, "learning_rate": 1.7832252466211818e-06, "loss": 0.6612, "step": 5067 }, { "epoch": 0.74, "grad_norm": 5.409833908081055, "learning_rate": 1.7831268265062534e-06, "loss": 0.6914, "step": 5068 }, { "epoch": 0.74, "grad_norm": 5.702186584472656, "learning_rate": 1.7830283867713548e-06, "loss": 0.7519, "step": 5069 }, { "epoch": 0.74, "grad_norm": 5.30949068069458, "learning_rate": 1.782929927418952e-06, "loss": 0.6899, "step": 5070 }, { "epoch": 0.74, "grad_norm": 5.456538677215576, "learning_rate": 1.782831448451512e-06, "loss": 0.6803, "step": 5071 }, { "epoch": 0.74, "grad_norm": 6.2699294090271, "learning_rate": 1.7827329498715025e-06, "loss": 0.7045, "step": 5072 }, { "epoch": 0.74, "grad_norm": 5.9246039390563965, "learning_rate": 1.7826344316813903e-06, "loss": 0.6746, "step": 5073 }, { "epoch": 0.74, "grad_norm": 5.8780012130737305, "learning_rate": 1.7825358938836446e-06, "loss": 0.694, "step": 5074 }, { "epoch": 0.74, "grad_norm": 5.511941432952881, "learning_rate": 1.7824373364807331e-06, "loss": 0.6554, "step": 5075 }, { "epoch": 0.74, "grad_norm": 6.000217437744141, "learning_rate": 1.7823387594751255e-06, "loss": 0.7273, "step": 5076 }, { "epoch": 0.74, "grad_norm": 6.284463882446289, "learning_rate": 1.7822401628692916e-06, "loss": 0.7948, "step": 5077 }, { "epoch": 0.74, "grad_norm": 6.337591648101807, "learning_rate": 1.7821415466657012e-06, "loss": 0.6421, "step": 5078 }, { "epoch": 0.74, "grad_norm": 5.043125152587891, "learning_rate": 1.7820429108668254e-06, "loss": 0.7486, "step": 5079 }, { "epoch": 0.74, "grad_norm": 5.339876651763916, "learning_rate": 1.781944255475135e-06, "loss": 0.6934, "step": 5080 }, { "epoch": 0.74, "grad_norm": 6.225269317626953, "learning_rate": 1.7818455804931021e-06, "loss": 0.7603, "step": 5081 }, { "epoch": 0.74, "grad_norm": 5.8576436042785645, "learning_rate": 1.7817468859231983e-06, "loss": 0.6962, "step": 5082 }, { "epoch": 0.74, "grad_norm": 5.843503952026367, "learning_rate": 1.7816481717678963e-06, "loss": 0.7741, "step": 5083 }, { "epoch": 0.74, "grad_norm": 5.152056694030762, "learning_rate": 1.7815494380296699e-06, "loss": 0.6903, "step": 5084 }, { "epoch": 0.74, "grad_norm": 5.738770961761475, "learning_rate": 1.7814506847109919e-06, "loss": 0.6878, "step": 5085 }, { "epoch": 0.74, "grad_norm": 5.224233150482178, "learning_rate": 1.7813519118143368e-06, "loss": 0.6825, "step": 5086 }, { "epoch": 0.74, "grad_norm": 6.338082313537598, "learning_rate": 1.781253119342179e-06, "loss": 0.607, "step": 5087 }, { "epoch": 0.74, "grad_norm": 5.430087089538574, "learning_rate": 1.7811543072969936e-06, "loss": 0.7469, "step": 5088 }, { "epoch": 0.74, "grad_norm": 5.430316925048828, "learning_rate": 1.7810554756812567e-06, "loss": 0.6805, "step": 5089 }, { "epoch": 0.74, "grad_norm": 5.240028381347656, "learning_rate": 1.7809566244974436e-06, "loss": 0.634, "step": 5090 }, { "epoch": 0.74, "grad_norm": 6.641418933868408, "learning_rate": 1.7808577537480312e-06, "loss": 0.8189, "step": 5091 }, { "epoch": 0.74, "grad_norm": 5.788208961486816, "learning_rate": 1.7807588634354964e-06, "loss": 0.7044, "step": 5092 }, { "epoch": 0.74, "grad_norm": 5.807234287261963, "learning_rate": 1.7806599535623173e-06, "loss": 0.6562, "step": 5093 }, { "epoch": 0.74, "grad_norm": 6.020280361175537, "learning_rate": 1.7805610241309714e-06, "loss": 0.7651, "step": 5094 }, { "epoch": 0.74, "grad_norm": 6.264505386352539, "learning_rate": 1.7804620751439373e-06, "loss": 0.7634, "step": 5095 }, { "epoch": 0.74, "grad_norm": 6.497669696807861, "learning_rate": 1.7803631066036936e-06, "loss": 0.6882, "step": 5096 }, { "epoch": 0.74, "grad_norm": 5.924059867858887, "learning_rate": 1.7802641185127207e-06, "loss": 0.678, "step": 5097 }, { "epoch": 0.74, "grad_norm": 5.950051784515381, "learning_rate": 1.780165110873498e-06, "loss": 0.6541, "step": 5098 }, { "epoch": 0.74, "grad_norm": 6.062461853027344, "learning_rate": 1.7800660836885059e-06, "loss": 0.6944, "step": 5099 }, { "epoch": 0.74, "grad_norm": 5.983416557312012, "learning_rate": 1.7799670369602257e-06, "loss": 0.7213, "step": 5100 }, { "epoch": 0.74, "grad_norm": 5.811056137084961, "learning_rate": 1.7798679706911388e-06, "loss": 0.7546, "step": 5101 }, { "epoch": 0.74, "grad_norm": 5.345924377441406, "learning_rate": 1.779768884883727e-06, "loss": 0.75, "step": 5102 }, { "epoch": 0.74, "grad_norm": 5.924196243286133, "learning_rate": 1.7796697795404727e-06, "loss": 0.6545, "step": 5103 }, { "epoch": 0.74, "grad_norm": 5.530303001403809, "learning_rate": 1.7795706546638586e-06, "loss": 0.7725, "step": 5104 }, { "epoch": 0.74, "grad_norm": 5.411242961883545, "learning_rate": 1.7794715102563692e-06, "loss": 0.6554, "step": 5105 }, { "epoch": 0.74, "grad_norm": 5.076975345611572, "learning_rate": 1.779372346320487e-06, "loss": 0.6492, "step": 5106 }, { "epoch": 0.74, "grad_norm": 5.346932888031006, "learning_rate": 1.779273162858697e-06, "loss": 0.7096, "step": 5107 }, { "epoch": 0.74, "grad_norm": 4.855838775634766, "learning_rate": 1.7791739598734842e-06, "loss": 0.6682, "step": 5108 }, { "epoch": 0.74, "grad_norm": 5.189286708831787, "learning_rate": 1.779074737367334e-06, "loss": 0.6414, "step": 5109 }, { "epoch": 0.74, "grad_norm": 5.291107177734375, "learning_rate": 1.778975495342732e-06, "loss": 0.7902, "step": 5110 }, { "epoch": 0.74, "grad_norm": 5.2560906410217285, "learning_rate": 1.778876233802165e-06, "loss": 0.7125, "step": 5111 }, { "epoch": 0.74, "grad_norm": 5.368105411529541, "learning_rate": 1.778776952748119e-06, "loss": 0.6735, "step": 5112 }, { "epoch": 0.74, "grad_norm": 5.397162437438965, "learning_rate": 1.7786776521830822e-06, "loss": 0.7043, "step": 5113 }, { "epoch": 0.74, "grad_norm": 6.275768280029297, "learning_rate": 1.7785783321095417e-06, "loss": 0.7356, "step": 5114 }, { "epoch": 0.74, "grad_norm": 5.137767314910889, "learning_rate": 1.7784789925299864e-06, "loss": 0.7721, "step": 5115 }, { "epoch": 0.74, "grad_norm": 5.72410249710083, "learning_rate": 1.7783796334469049e-06, "loss": 0.6886, "step": 5116 }, { "epoch": 0.74, "grad_norm": 5.481106281280518, "learning_rate": 1.778280254862786e-06, "loss": 0.6932, "step": 5117 }, { "epoch": 0.74, "grad_norm": 5.778166770935059, "learning_rate": 1.7781808567801205e-06, "loss": 0.8169, "step": 5118 }, { "epoch": 0.74, "grad_norm": 5.334293842315674, "learning_rate": 1.7780814392013974e-06, "loss": 0.7296, "step": 5119 }, { "epoch": 0.74, "grad_norm": 5.533257961273193, "learning_rate": 1.7779820021291085e-06, "loss": 0.7066, "step": 5120 }, { "epoch": 0.74, "grad_norm": 5.2146782875061035, "learning_rate": 1.7778825455657444e-06, "loss": 0.7023, "step": 5121 }, { "epoch": 0.74, "grad_norm": 5.363673686981201, "learning_rate": 1.777783069513797e-06, "loss": 0.6922, "step": 5122 }, { "epoch": 0.74, "grad_norm": 5.676177978515625, "learning_rate": 1.777683573975759e-06, "loss": 0.7365, "step": 5123 }, { "epoch": 0.74, "grad_norm": 5.428234100341797, "learning_rate": 1.7775840589541224e-06, "loss": 0.7439, "step": 5124 }, { "epoch": 0.74, "grad_norm": 5.717024803161621, "learning_rate": 1.7774845244513808e-06, "loss": 0.7326, "step": 5125 }, { "epoch": 0.74, "grad_norm": 5.597772121429443, "learning_rate": 1.7773849704700275e-06, "loss": 0.7434, "step": 5126 }, { "epoch": 0.74, "grad_norm": 5.235649585723877, "learning_rate": 1.7772853970125568e-06, "loss": 0.6876, "step": 5127 }, { "epoch": 0.74, "grad_norm": 5.69899320602417, "learning_rate": 1.7771858040814635e-06, "loss": 0.7272, "step": 5128 }, { "epoch": 0.74, "grad_norm": 5.8105902671813965, "learning_rate": 1.7770861916792428e-06, "loss": 0.7532, "step": 5129 }, { "epoch": 0.74, "grad_norm": 5.229121685028076, "learning_rate": 1.7769865598083902e-06, "loss": 0.7085, "step": 5130 }, { "epoch": 0.74, "grad_norm": 5.397160530090332, "learning_rate": 1.7768869084714018e-06, "loss": 0.688, "step": 5131 }, { "epoch": 0.74, "grad_norm": 6.057365417480469, "learning_rate": 1.7767872376707741e-06, "loss": 0.7777, "step": 5132 }, { "epoch": 0.74, "grad_norm": 5.000528335571289, "learning_rate": 1.7766875474090042e-06, "loss": 0.6336, "step": 5133 }, { "epoch": 0.74, "grad_norm": 5.712275505065918, "learning_rate": 1.77658783768859e-06, "loss": 0.7065, "step": 5134 }, { "epoch": 0.75, "grad_norm": 5.235197067260742, "learning_rate": 1.7764881085120296e-06, "loss": 0.7089, "step": 5135 }, { "epoch": 0.75, "grad_norm": 5.3145623207092285, "learning_rate": 1.7763883598818209e-06, "loss": 0.7121, "step": 5136 }, { "epoch": 0.75, "grad_norm": 5.406052589416504, "learning_rate": 1.7762885918004633e-06, "loss": 0.6685, "step": 5137 }, { "epoch": 0.75, "grad_norm": 5.6122870445251465, "learning_rate": 1.7761888042704566e-06, "loss": 0.7184, "step": 5138 }, { "epoch": 0.75, "grad_norm": 5.524527072906494, "learning_rate": 1.7760889972943006e-06, "loss": 0.6544, "step": 5139 }, { "epoch": 0.75, "grad_norm": 5.357944011688232, "learning_rate": 1.7759891708744956e-06, "loss": 0.6567, "step": 5140 }, { "epoch": 0.75, "grad_norm": 6.484145164489746, "learning_rate": 1.775889325013543e-06, "loss": 0.8205, "step": 5141 }, { "epoch": 0.75, "grad_norm": 5.935305595397949, "learning_rate": 1.7757894597139437e-06, "loss": 0.702, "step": 5142 }, { "epoch": 0.75, "grad_norm": 5.751850128173828, "learning_rate": 1.7756895749782003e-06, "loss": 0.7331, "step": 5143 }, { "epoch": 0.75, "grad_norm": 5.420998573303223, "learning_rate": 1.7755896708088148e-06, "loss": 0.708, "step": 5144 }, { "epoch": 0.75, "grad_norm": 6.684930324554443, "learning_rate": 1.7754897472082904e-06, "loss": 0.8132, "step": 5145 }, { "epoch": 0.75, "grad_norm": 6.282158374786377, "learning_rate": 1.7753898041791304e-06, "loss": 0.8537, "step": 5146 }, { "epoch": 0.75, "grad_norm": 5.6893744468688965, "learning_rate": 1.7752898417238389e-06, "loss": 0.7014, "step": 5147 }, { "epoch": 0.75, "grad_norm": 5.4581146240234375, "learning_rate": 1.7751898598449197e-06, "loss": 0.7187, "step": 5148 }, { "epoch": 0.75, "grad_norm": 5.81096887588501, "learning_rate": 1.7750898585448785e-06, "loss": 0.6837, "step": 5149 }, { "epoch": 0.75, "grad_norm": 5.295176982879639, "learning_rate": 1.7749898378262203e-06, "loss": 0.6781, "step": 5150 }, { "epoch": 0.75, "grad_norm": 5.8904337882995605, "learning_rate": 1.7748897976914507e-06, "loss": 0.8058, "step": 5151 }, { "epoch": 0.75, "grad_norm": 5.691622734069824, "learning_rate": 1.7747897381430765e-06, "loss": 0.7633, "step": 5152 }, { "epoch": 0.75, "grad_norm": 6.039175033569336, "learning_rate": 1.7746896591836041e-06, "loss": 0.6905, "step": 5153 }, { "epoch": 0.75, "grad_norm": 5.889928340911865, "learning_rate": 1.7745895608155413e-06, "loss": 0.7381, "step": 5154 }, { "epoch": 0.75, "grad_norm": 5.059907913208008, "learning_rate": 1.7744894430413954e-06, "loss": 0.7335, "step": 5155 }, { "epoch": 0.75, "grad_norm": 5.078700065612793, "learning_rate": 1.774389305863675e-06, "loss": 0.6276, "step": 5156 }, { "epoch": 0.75, "grad_norm": 4.712657928466797, "learning_rate": 1.7742891492848885e-06, "loss": 0.6751, "step": 5157 }, { "epoch": 0.75, "grad_norm": 5.298044681549072, "learning_rate": 1.7741889733075456e-06, "loss": 0.6931, "step": 5158 }, { "epoch": 0.75, "grad_norm": 5.260379314422607, "learning_rate": 1.7740887779341561e-06, "loss": 0.7496, "step": 5159 }, { "epoch": 0.75, "grad_norm": 5.044879913330078, "learning_rate": 1.7739885631672301e-06, "loss": 0.6456, "step": 5160 }, { "epoch": 0.75, "grad_norm": 5.5367326736450195, "learning_rate": 1.7738883290092782e-06, "loss": 0.7175, "step": 5161 }, { "epoch": 0.75, "grad_norm": 5.82496452331543, "learning_rate": 1.7737880754628113e-06, "loss": 0.7035, "step": 5162 }, { "epoch": 0.75, "grad_norm": 5.295177459716797, "learning_rate": 1.7736878025303418e-06, "loss": 0.6319, "step": 5163 }, { "epoch": 0.75, "grad_norm": 5.765507698059082, "learning_rate": 1.7735875102143813e-06, "loss": 0.7115, "step": 5164 }, { "epoch": 0.75, "grad_norm": 5.468744277954102, "learning_rate": 1.773487198517443e-06, "loss": 0.611, "step": 5165 }, { "epoch": 0.75, "grad_norm": 5.510818958282471, "learning_rate": 1.7733868674420393e-06, "loss": 0.7084, "step": 5166 }, { "epoch": 0.75, "grad_norm": 4.9186248779296875, "learning_rate": 1.7732865169906845e-06, "loss": 0.6527, "step": 5167 }, { "epoch": 0.75, "grad_norm": 6.315671920776367, "learning_rate": 1.7731861471658927e-06, "loss": 0.7272, "step": 5168 }, { "epoch": 0.75, "grad_norm": 6.520303249359131, "learning_rate": 1.773085757970178e-06, "loss": 0.8387, "step": 5169 }, { "epoch": 0.75, "grad_norm": 5.506286144256592, "learning_rate": 1.7729853494060559e-06, "loss": 0.666, "step": 5170 }, { "epoch": 0.75, "grad_norm": 5.683443546295166, "learning_rate": 1.7728849214760415e-06, "loss": 0.7028, "step": 5171 }, { "epoch": 0.75, "grad_norm": 5.17422342300415, "learning_rate": 1.7727844741826514e-06, "loss": 0.7543, "step": 5172 }, { "epoch": 0.75, "grad_norm": 5.326236248016357, "learning_rate": 1.772684007528402e-06, "loss": 0.6616, "step": 5173 }, { "epoch": 0.75, "grad_norm": 5.707006454467773, "learning_rate": 1.7725835215158103e-06, "loss": 0.7146, "step": 5174 }, { "epoch": 0.75, "grad_norm": 5.266611576080322, "learning_rate": 1.7724830161473939e-06, "loss": 0.6628, "step": 5175 }, { "epoch": 0.75, "grad_norm": 6.303139686584473, "learning_rate": 1.7723824914256701e-06, "loss": 0.7494, "step": 5176 }, { "epoch": 0.75, "grad_norm": 5.6455488204956055, "learning_rate": 1.7722819473531585e-06, "loss": 0.8053, "step": 5177 }, { "epoch": 0.75, "grad_norm": 5.5632219314575195, "learning_rate": 1.7721813839323775e-06, "loss": 0.7502, "step": 5178 }, { "epoch": 0.75, "grad_norm": 5.511255741119385, "learning_rate": 1.7720808011658464e-06, "loss": 0.7402, "step": 5179 }, { "epoch": 0.75, "grad_norm": 5.976682662963867, "learning_rate": 1.7719801990560854e-06, "loss": 0.7697, "step": 5180 }, { "epoch": 0.75, "grad_norm": 5.833791255950928, "learning_rate": 1.771879577605615e-06, "loss": 0.695, "step": 5181 }, { "epoch": 0.75, "grad_norm": 5.9684906005859375, "learning_rate": 1.7717789368169558e-06, "loss": 0.7243, "step": 5182 }, { "epoch": 0.75, "grad_norm": 5.0720367431640625, "learning_rate": 1.7716782766926293e-06, "loss": 0.6457, "step": 5183 }, { "epoch": 0.75, "grad_norm": 5.20128870010376, "learning_rate": 1.7715775972351576e-06, "loss": 0.6536, "step": 5184 }, { "epoch": 0.75, "grad_norm": 5.188466548919678, "learning_rate": 1.7714768984470627e-06, "loss": 0.7275, "step": 5185 }, { "epoch": 0.75, "grad_norm": 5.095653057098389, "learning_rate": 1.7713761803308677e-06, "loss": 0.6529, "step": 5186 }, { "epoch": 0.75, "grad_norm": 6.096716403961182, "learning_rate": 1.7712754428890961e-06, "loss": 0.7873, "step": 5187 }, { "epoch": 0.75, "grad_norm": 6.228572368621826, "learning_rate": 1.7711746861242711e-06, "loss": 0.7354, "step": 5188 }, { "epoch": 0.75, "grad_norm": 6.335082054138184, "learning_rate": 1.7710739100389174e-06, "loss": 0.7234, "step": 5189 }, { "epoch": 0.75, "grad_norm": 5.407289028167725, "learning_rate": 1.77097311463556e-06, "loss": 0.6892, "step": 5190 }, { "epoch": 0.75, "grad_norm": 6.458251476287842, "learning_rate": 1.770872299916724e-06, "loss": 0.6577, "step": 5191 }, { "epoch": 0.75, "grad_norm": 6.1499834060668945, "learning_rate": 1.7707714658849345e-06, "loss": 0.7339, "step": 5192 }, { "epoch": 0.75, "grad_norm": 6.127452850341797, "learning_rate": 1.7706706125427187e-06, "loss": 0.6984, "step": 5193 }, { "epoch": 0.75, "grad_norm": 6.234259605407715, "learning_rate": 1.7705697398926027e-06, "loss": 0.714, "step": 5194 }, { "epoch": 0.75, "grad_norm": 5.845696926116943, "learning_rate": 1.770468847937114e-06, "loss": 0.7632, "step": 5195 }, { "epoch": 0.75, "grad_norm": 5.515535831451416, "learning_rate": 1.7703679366787804e-06, "loss": 0.6262, "step": 5196 }, { "epoch": 0.75, "grad_norm": 5.693085670471191, "learning_rate": 1.7702670061201298e-06, "loss": 0.6947, "step": 5197 }, { "epoch": 0.75, "grad_norm": 4.91819953918457, "learning_rate": 1.7701660562636906e-06, "loss": 0.7666, "step": 5198 }, { "epoch": 0.75, "grad_norm": 5.593300819396973, "learning_rate": 1.7700650871119925e-06, "loss": 0.7159, "step": 5199 }, { "epoch": 0.75, "grad_norm": 6.057154655456543, "learning_rate": 1.769964098667565e-06, "loss": 0.784, "step": 5200 }, { "epoch": 0.75, "grad_norm": 5.673327445983887, "learning_rate": 1.769863090932938e-06, "loss": 0.649, "step": 5201 }, { "epoch": 0.75, "grad_norm": 5.999256610870361, "learning_rate": 1.7697620639106418e-06, "loss": 0.8254, "step": 5202 }, { "epoch": 0.75, "grad_norm": 5.725100040435791, "learning_rate": 1.769661017603208e-06, "loss": 0.7083, "step": 5203 }, { "epoch": 0.76, "grad_norm": 5.783751010894775, "learning_rate": 1.7695599520131681e-06, "loss": 0.6962, "step": 5204 }, { "epoch": 0.76, "grad_norm": 5.843574047088623, "learning_rate": 1.7694588671430538e-06, "loss": 0.7144, "step": 5205 }, { "epoch": 0.76, "grad_norm": 4.806434154510498, "learning_rate": 1.7693577629953982e-06, "loss": 0.697, "step": 5206 }, { "epoch": 0.76, "grad_norm": 5.656452178955078, "learning_rate": 1.7692566395727337e-06, "loss": 0.669, "step": 5207 }, { "epoch": 0.76, "grad_norm": 5.5650553703308105, "learning_rate": 1.769155496877594e-06, "loss": 0.6915, "step": 5208 }, { "epoch": 0.76, "grad_norm": 5.234495162963867, "learning_rate": 1.769054334912513e-06, "loss": 0.6589, "step": 5209 }, { "epoch": 0.76, "grad_norm": 5.414425849914551, "learning_rate": 1.7689531536800253e-06, "loss": 0.7787, "step": 5210 }, { "epoch": 0.76, "grad_norm": 5.313234329223633, "learning_rate": 1.7688519531826658e-06, "loss": 0.6689, "step": 5211 }, { "epoch": 0.76, "grad_norm": 5.643286228179932, "learning_rate": 1.7687507334229697e-06, "loss": 0.7171, "step": 5212 }, { "epoch": 0.76, "grad_norm": 6.080776691436768, "learning_rate": 1.7686494944034732e-06, "loss": 0.768, "step": 5213 }, { "epoch": 0.76, "grad_norm": 5.278109073638916, "learning_rate": 1.7685482361267125e-06, "loss": 0.7818, "step": 5214 }, { "epoch": 0.76, "grad_norm": 5.145295143127441, "learning_rate": 1.7684469585952246e-06, "loss": 0.6796, "step": 5215 }, { "epoch": 0.76, "grad_norm": 5.27147102355957, "learning_rate": 1.7683456618115467e-06, "loss": 0.7176, "step": 5216 }, { "epoch": 0.76, "grad_norm": 5.2726263999938965, "learning_rate": 1.7682443457782165e-06, "loss": 0.7154, "step": 5217 }, { "epoch": 0.76, "grad_norm": 6.053574562072754, "learning_rate": 1.7681430104977727e-06, "loss": 0.6876, "step": 5218 }, { "epoch": 0.76, "grad_norm": 4.962067127227783, "learning_rate": 1.7680416559727539e-06, "loss": 0.648, "step": 5219 }, { "epoch": 0.76, "grad_norm": 6.3731842041015625, "learning_rate": 1.767940282205699e-06, "loss": 0.7024, "step": 5220 }, { "epoch": 0.76, "grad_norm": 5.313854694366455, "learning_rate": 1.7678388891991484e-06, "loss": 0.7505, "step": 5221 }, { "epoch": 0.76, "grad_norm": 5.613075256347656, "learning_rate": 1.7677374769556419e-06, "loss": 0.7951, "step": 5222 }, { "epoch": 0.76, "grad_norm": 5.634467601776123, "learning_rate": 1.7676360454777207e-06, "loss": 0.7176, "step": 5223 }, { "epoch": 0.76, "grad_norm": 5.515353202819824, "learning_rate": 1.7675345947679253e-06, "loss": 0.7376, "step": 5224 }, { "epoch": 0.76, "grad_norm": 5.484528064727783, "learning_rate": 1.767433124828798e-06, "loss": 0.74, "step": 5225 }, { "epoch": 0.76, "grad_norm": 5.2817769050598145, "learning_rate": 1.7673316356628805e-06, "loss": 0.727, "step": 5226 }, { "epoch": 0.76, "grad_norm": 6.277569770812988, "learning_rate": 1.7672301272727155e-06, "loss": 0.7144, "step": 5227 }, { "epoch": 0.76, "grad_norm": 5.404320240020752, "learning_rate": 1.7671285996608465e-06, "loss": 0.6722, "step": 5228 }, { "epoch": 0.76, "grad_norm": 5.624408721923828, "learning_rate": 1.767027052829817e-06, "loss": 0.7119, "step": 5229 }, { "epoch": 0.76, "grad_norm": 5.2739691734313965, "learning_rate": 1.7669254867821706e-06, "loss": 0.7024, "step": 5230 }, { "epoch": 0.76, "grad_norm": 5.046714782714844, "learning_rate": 1.7668239015204526e-06, "loss": 0.7258, "step": 5231 }, { "epoch": 0.76, "grad_norm": 5.902163982391357, "learning_rate": 1.7667222970472075e-06, "loss": 0.7452, "step": 5232 }, { "epoch": 0.76, "grad_norm": 5.492377758026123, "learning_rate": 1.7666206733649813e-06, "loss": 0.7238, "step": 5233 }, { "epoch": 0.76, "grad_norm": 5.721970081329346, "learning_rate": 1.7665190304763196e-06, "loss": 0.7233, "step": 5234 }, { "epoch": 0.76, "grad_norm": 5.735313415527344, "learning_rate": 1.7664173683837689e-06, "loss": 0.7076, "step": 5235 }, { "epoch": 0.76, "grad_norm": 6.1200480461120605, "learning_rate": 1.7663156870898765e-06, "loss": 0.6987, "step": 5236 }, { "epoch": 0.76, "grad_norm": 6.04601526260376, "learning_rate": 1.7662139865971896e-06, "loss": 0.7724, "step": 5237 }, { "epoch": 0.76, "grad_norm": 5.789757251739502, "learning_rate": 1.7661122669082564e-06, "loss": 0.7378, "step": 5238 }, { "epoch": 0.76, "grad_norm": 5.879403591156006, "learning_rate": 1.766010528025625e-06, "loss": 0.6802, "step": 5239 }, { "epoch": 0.76, "grad_norm": 5.972212314605713, "learning_rate": 1.7659087699518444e-06, "loss": 0.7661, "step": 5240 }, { "epoch": 0.76, "grad_norm": 6.071020126342773, "learning_rate": 1.7658069926894643e-06, "loss": 0.7397, "step": 5241 }, { "epoch": 0.76, "grad_norm": 5.601783275604248, "learning_rate": 1.7657051962410341e-06, "loss": 0.7191, "step": 5242 }, { "epoch": 0.76, "grad_norm": 5.792266845703125, "learning_rate": 1.7656033806091046e-06, "loss": 0.7999, "step": 5243 }, { "epoch": 0.76, "grad_norm": 5.599869251251221, "learning_rate": 1.765501545796226e-06, "loss": 0.7436, "step": 5244 }, { "epoch": 0.76, "grad_norm": 4.92429780960083, "learning_rate": 1.7653996918049503e-06, "loss": 0.6724, "step": 5245 }, { "epoch": 0.76, "grad_norm": 5.836276531219482, "learning_rate": 1.765297818637829e-06, "loss": 0.7634, "step": 5246 }, { "epoch": 0.76, "grad_norm": 5.648105144500732, "learning_rate": 1.7651959262974141e-06, "loss": 0.7202, "step": 5247 }, { "epoch": 0.76, "grad_norm": 5.342816352844238, "learning_rate": 1.7650940147862585e-06, "loss": 0.7065, "step": 5248 }, { "epoch": 0.76, "grad_norm": 5.750057220458984, "learning_rate": 1.7649920841069154e-06, "loss": 0.6835, "step": 5249 }, { "epoch": 0.76, "grad_norm": 5.209587097167969, "learning_rate": 1.764890134261939e-06, "loss": 0.7755, "step": 5250 }, { "epoch": 0.76, "grad_norm": 5.416133403778076, "learning_rate": 1.764788165253883e-06, "loss": 0.7825, "step": 5251 }, { "epoch": 0.76, "grad_norm": 5.185415744781494, "learning_rate": 1.764686177085302e-06, "loss": 0.6208, "step": 5252 }, { "epoch": 0.76, "grad_norm": 5.388807773590088, "learning_rate": 1.7645841697587517e-06, "loss": 0.6438, "step": 5253 }, { "epoch": 0.76, "grad_norm": 6.025538921356201, "learning_rate": 1.764482143276787e-06, "loss": 0.7738, "step": 5254 }, { "epoch": 0.76, "grad_norm": 5.746029376983643, "learning_rate": 1.7643800976419643e-06, "loss": 0.7516, "step": 5255 }, { "epoch": 0.76, "grad_norm": 6.185274600982666, "learning_rate": 1.7642780328568405e-06, "loss": 0.7444, "step": 5256 }, { "epoch": 0.76, "grad_norm": 5.799735069274902, "learning_rate": 1.7641759489239725e-06, "loss": 0.7967, "step": 5257 }, { "epoch": 0.76, "grad_norm": 5.037546634674072, "learning_rate": 1.7640738458459175e-06, "loss": 0.6085, "step": 5258 }, { "epoch": 0.76, "grad_norm": 6.071805000305176, "learning_rate": 1.7639717236252335e-06, "loss": 0.8207, "step": 5259 }, { "epoch": 0.76, "grad_norm": 5.559423446655273, "learning_rate": 1.7638695822644798e-06, "loss": 0.695, "step": 5260 }, { "epoch": 0.76, "grad_norm": 5.952159404754639, "learning_rate": 1.7637674217662148e-06, "loss": 0.7221, "step": 5261 }, { "epoch": 0.76, "grad_norm": 5.2631731033325195, "learning_rate": 1.7636652421329976e-06, "loss": 0.6741, "step": 5262 }, { "epoch": 0.76, "grad_norm": 5.605224132537842, "learning_rate": 1.7635630433673891e-06, "loss": 0.6881, "step": 5263 }, { "epoch": 0.76, "grad_norm": 5.431325435638428, "learning_rate": 1.7634608254719492e-06, "loss": 0.6913, "step": 5264 }, { "epoch": 0.76, "grad_norm": 5.373641014099121, "learning_rate": 1.7633585884492384e-06, "loss": 0.6517, "step": 5265 }, { "epoch": 0.76, "grad_norm": 5.757443428039551, "learning_rate": 1.7632563323018187e-06, "loss": 0.7157, "step": 5266 }, { "epoch": 0.76, "grad_norm": 5.491476535797119, "learning_rate": 1.7631540570322518e-06, "loss": 0.7703, "step": 5267 }, { "epoch": 0.76, "grad_norm": 5.515719890594482, "learning_rate": 1.7630517626431e-06, "loss": 0.7441, "step": 5268 }, { "epoch": 0.76, "grad_norm": 5.990365028381348, "learning_rate": 1.762949449136926e-06, "loss": 0.7173, "step": 5269 }, { "epoch": 0.76, "grad_norm": 5.74639892578125, "learning_rate": 1.7628471165162934e-06, "loss": 0.7326, "step": 5270 }, { "epoch": 0.76, "grad_norm": 5.38437032699585, "learning_rate": 1.7627447647837656e-06, "loss": 0.6947, "step": 5271 }, { "epoch": 0.76, "grad_norm": 5.710860729217529, "learning_rate": 1.7626423939419072e-06, "loss": 0.6932, "step": 5272 }, { "epoch": 0.77, "grad_norm": 5.108152866363525, "learning_rate": 1.762540003993283e-06, "loss": 0.7217, "step": 5273 }, { "epoch": 0.77, "grad_norm": 5.9291157722473145, "learning_rate": 1.7624375949404576e-06, "loss": 0.652, "step": 5274 }, { "epoch": 0.77, "grad_norm": 5.245149612426758, "learning_rate": 1.7623351667859972e-06, "loss": 0.6767, "step": 5275 }, { "epoch": 0.77, "grad_norm": 5.412734031677246, "learning_rate": 1.7622327195324678e-06, "loss": 0.7305, "step": 5276 }, { "epoch": 0.77, "grad_norm": 4.844327449798584, "learning_rate": 1.7621302531824363e-06, "loss": 0.6825, "step": 5277 }, { "epoch": 0.77, "grad_norm": 4.7749738693237305, "learning_rate": 1.7620277677384694e-06, "loss": 0.6376, "step": 5278 }, { "epoch": 0.77, "grad_norm": 6.477874755859375, "learning_rate": 1.761925263203135e-06, "loss": 0.8309, "step": 5279 }, { "epoch": 0.77, "grad_norm": 5.20645809173584, "learning_rate": 1.7618227395790012e-06, "loss": 0.6491, "step": 5280 }, { "epoch": 0.77, "grad_norm": 5.890506267547607, "learning_rate": 1.7617201968686367e-06, "loss": 0.6704, "step": 5281 }, { "epoch": 0.77, "grad_norm": 5.417348384857178, "learning_rate": 1.76161763507461e-06, "loss": 0.7012, "step": 5282 }, { "epoch": 0.77, "grad_norm": 4.854154586791992, "learning_rate": 1.7615150541994913e-06, "loss": 0.6553, "step": 5283 }, { "epoch": 0.77, "grad_norm": 5.4064621925354, "learning_rate": 1.7614124542458497e-06, "loss": 0.6496, "step": 5284 }, { "epoch": 0.77, "grad_norm": 6.073987007141113, "learning_rate": 1.7613098352162568e-06, "loss": 0.7026, "step": 5285 }, { "epoch": 0.77, "grad_norm": 5.847560405731201, "learning_rate": 1.7612071971132827e-06, "loss": 0.7198, "step": 5286 }, { "epoch": 0.77, "grad_norm": 5.932650089263916, "learning_rate": 1.761104539939499e-06, "loss": 0.7261, "step": 5287 }, { "epoch": 0.77, "grad_norm": 5.498531341552734, "learning_rate": 1.7610018636974779e-06, "loss": 0.7068, "step": 5288 }, { "epoch": 0.77, "grad_norm": 5.623023509979248, "learning_rate": 1.7608991683897915e-06, "loss": 0.6482, "step": 5289 }, { "epoch": 0.77, "grad_norm": 5.283902168273926, "learning_rate": 1.760796454019013e-06, "loss": 0.7106, "step": 5290 }, { "epoch": 0.77, "grad_norm": 5.2934088706970215, "learning_rate": 1.760693720587715e-06, "loss": 0.6357, "step": 5291 }, { "epoch": 0.77, "grad_norm": 5.654341697692871, "learning_rate": 1.7605909680984724e-06, "loss": 0.7101, "step": 5292 }, { "epoch": 0.77, "grad_norm": 5.443848133087158, "learning_rate": 1.7604881965538584e-06, "loss": 0.7059, "step": 5293 }, { "epoch": 0.77, "grad_norm": 5.317073345184326, "learning_rate": 1.7603854059564487e-06, "loss": 0.6882, "step": 5294 }, { "epoch": 0.77, "grad_norm": 5.0415544509887695, "learning_rate": 1.7602825963088181e-06, "loss": 0.6875, "step": 5295 }, { "epoch": 0.77, "grad_norm": 5.341383934020996, "learning_rate": 1.7601797676135425e-06, "loss": 0.5838, "step": 5296 }, { "epoch": 0.77, "grad_norm": 4.744532585144043, "learning_rate": 1.7600769198731977e-06, "loss": 0.6849, "step": 5297 }, { "epoch": 0.77, "grad_norm": 5.951932430267334, "learning_rate": 1.7599740530903607e-06, "loss": 0.7586, "step": 5298 }, { "epoch": 0.77, "grad_norm": 5.2701334953308105, "learning_rate": 1.7598711672676088e-06, "loss": 0.7124, "step": 5299 }, { "epoch": 0.77, "grad_norm": 5.691628456115723, "learning_rate": 1.7597682624075193e-06, "loss": 0.8034, "step": 5300 }, { "epoch": 0.77, "grad_norm": 5.920676231384277, "learning_rate": 1.7596653385126702e-06, "loss": 0.74, "step": 5301 }, { "epoch": 0.77, "grad_norm": 5.727597713470459, "learning_rate": 1.7595623955856408e-06, "loss": 0.6182, "step": 5302 }, { "epoch": 0.77, "grad_norm": 5.800874710083008, "learning_rate": 1.7594594336290098e-06, "loss": 0.7667, "step": 5303 }, { "epoch": 0.77, "grad_norm": 5.610649108886719, "learning_rate": 1.7593564526453566e-06, "loss": 0.8157, "step": 5304 }, { "epoch": 0.77, "grad_norm": 5.559182643890381, "learning_rate": 1.7592534526372612e-06, "loss": 0.6334, "step": 5305 }, { "epoch": 0.77, "grad_norm": 5.7466020584106445, "learning_rate": 1.759150433607304e-06, "loss": 0.6796, "step": 5306 }, { "epoch": 0.77, "grad_norm": 5.789839267730713, "learning_rate": 1.7590473955580665e-06, "loss": 0.7733, "step": 5307 }, { "epoch": 0.77, "grad_norm": 5.9650044441223145, "learning_rate": 1.7589443384921299e-06, "loss": 0.7187, "step": 5308 }, { "epoch": 0.77, "grad_norm": 6.07756233215332, "learning_rate": 1.7588412624120758e-06, "loss": 0.7501, "step": 5309 }, { "epoch": 0.77, "grad_norm": 5.8236565589904785, "learning_rate": 1.7587381673204868e-06, "loss": 0.8265, "step": 5310 }, { "epoch": 0.77, "grad_norm": 4.856908321380615, "learning_rate": 1.758635053219946e-06, "loss": 0.6492, "step": 5311 }, { "epoch": 0.77, "grad_norm": 5.325366973876953, "learning_rate": 1.758531920113037e-06, "loss": 0.6845, "step": 5312 }, { "epoch": 0.77, "grad_norm": 5.423507213592529, "learning_rate": 1.7584287680023426e-06, "loss": 0.6718, "step": 5313 }, { "epoch": 0.77, "grad_norm": 5.356166839599609, "learning_rate": 1.7583255968904479e-06, "loss": 0.7729, "step": 5314 }, { "epoch": 0.77, "grad_norm": 5.594882488250732, "learning_rate": 1.7582224067799377e-06, "loss": 0.7057, "step": 5315 }, { "epoch": 0.77, "grad_norm": 5.571303844451904, "learning_rate": 1.758119197673397e-06, "loss": 0.6383, "step": 5316 }, { "epoch": 0.77, "grad_norm": 5.491313457489014, "learning_rate": 1.7580159695734114e-06, "loss": 0.742, "step": 5317 }, { "epoch": 0.77, "grad_norm": 6.1121296882629395, "learning_rate": 1.7579127224825673e-06, "loss": 0.7423, "step": 5318 }, { "epoch": 0.77, "grad_norm": 6.3717193603515625, "learning_rate": 1.7578094564034519e-06, "loss": 0.7147, "step": 5319 }, { "epoch": 0.77, "grad_norm": 4.805378437042236, "learning_rate": 1.7577061713386514e-06, "loss": 0.6429, "step": 5320 }, { "epoch": 0.77, "grad_norm": 5.306426048278809, "learning_rate": 1.757602867290754e-06, "loss": 0.7292, "step": 5321 }, { "epoch": 0.77, "grad_norm": 5.393239498138428, "learning_rate": 1.7574995442623479e-06, "loss": 0.694, "step": 5322 }, { "epoch": 0.77, "grad_norm": 5.354186058044434, "learning_rate": 1.7573962022560215e-06, "loss": 0.6909, "step": 5323 }, { "epoch": 0.77, "grad_norm": 6.960894584655762, "learning_rate": 1.757292841274364e-06, "loss": 0.8424, "step": 5324 }, { "epoch": 0.77, "grad_norm": 5.735565185546875, "learning_rate": 1.7571894613199644e-06, "loss": 0.701, "step": 5325 }, { "epoch": 0.77, "grad_norm": 5.895358085632324, "learning_rate": 1.7570860623954135e-06, "loss": 0.7041, "step": 5326 }, { "epoch": 0.77, "grad_norm": 5.724328994750977, "learning_rate": 1.7569826445033013e-06, "loss": 0.7891, "step": 5327 }, { "epoch": 0.77, "grad_norm": 6.133674621582031, "learning_rate": 1.7568792076462191e-06, "loss": 0.7881, "step": 5328 }, { "epoch": 0.77, "grad_norm": 6.341805458068848, "learning_rate": 1.7567757518267578e-06, "loss": 0.762, "step": 5329 }, { "epoch": 0.77, "grad_norm": 6.076075077056885, "learning_rate": 1.7566722770475103e-06, "loss": 0.7198, "step": 5330 }, { "epoch": 0.77, "grad_norm": 6.47216272354126, "learning_rate": 1.7565687833110678e-06, "loss": 0.7502, "step": 5331 }, { "epoch": 0.77, "grad_norm": 5.886032581329346, "learning_rate": 1.756465270620024e-06, "loss": 0.7954, "step": 5332 }, { "epoch": 0.77, "grad_norm": 6.5186381340026855, "learning_rate": 1.7563617389769716e-06, "loss": 0.8633, "step": 5333 }, { "epoch": 0.77, "grad_norm": 5.80206298828125, "learning_rate": 1.756258188384505e-06, "loss": 0.6768, "step": 5334 }, { "epoch": 0.77, "grad_norm": 5.529524326324463, "learning_rate": 1.7561546188452185e-06, "loss": 0.6389, "step": 5335 }, { "epoch": 0.77, "grad_norm": 5.581200122833252, "learning_rate": 1.7560510303617066e-06, "loss": 0.7194, "step": 5336 }, { "epoch": 0.77, "grad_norm": 5.491984844207764, "learning_rate": 1.7559474229365644e-06, "loss": 0.7385, "step": 5337 }, { "epoch": 0.77, "grad_norm": 5.592919826507568, "learning_rate": 1.755843796572388e-06, "loss": 0.7211, "step": 5338 }, { "epoch": 0.77, "grad_norm": 5.179227828979492, "learning_rate": 1.755740151271773e-06, "loss": 0.7112, "step": 5339 }, { "epoch": 0.77, "grad_norm": 5.887372016906738, "learning_rate": 1.755636487037317e-06, "loss": 0.7791, "step": 5340 }, { "epoch": 0.77, "grad_norm": 5.426767349243164, "learning_rate": 1.7555328038716162e-06, "loss": 0.6861, "step": 5341 }, { "epoch": 0.78, "grad_norm": 5.466141223907471, "learning_rate": 1.7554291017772687e-06, "loss": 0.6945, "step": 5342 }, { "epoch": 0.78, "grad_norm": 4.787928581237793, "learning_rate": 1.7553253807568725e-06, "loss": 0.7021, "step": 5343 }, { "epoch": 0.78, "grad_norm": 5.850632190704346, "learning_rate": 1.7552216408130264e-06, "loss": 0.7565, "step": 5344 }, { "epoch": 0.78, "grad_norm": 5.247949600219727, "learning_rate": 1.7551178819483289e-06, "loss": 0.7852, "step": 5345 }, { "epoch": 0.78, "grad_norm": 5.060332298278809, "learning_rate": 1.7550141041653798e-06, "loss": 0.6608, "step": 5346 }, { "epoch": 0.78, "grad_norm": 5.524642467498779, "learning_rate": 1.7549103074667792e-06, "loss": 0.6589, "step": 5347 }, { "epoch": 0.78, "grad_norm": 5.2719645500183105, "learning_rate": 1.7548064918551275e-06, "loss": 0.6828, "step": 5348 }, { "epoch": 0.78, "grad_norm": 5.272633075714111, "learning_rate": 1.7547026573330255e-06, "loss": 0.6684, "step": 5349 }, { "epoch": 0.78, "grad_norm": 5.2094645500183105, "learning_rate": 1.7545988039030749e-06, "loss": 0.6736, "step": 5350 }, { "epoch": 0.78, "grad_norm": 6.419463634490967, "learning_rate": 1.754494931567877e-06, "loss": 0.7498, "step": 5351 }, { "epoch": 0.78, "grad_norm": 5.681466102600098, "learning_rate": 1.7543910403300347e-06, "loss": 0.8375, "step": 5352 }, { "epoch": 0.78, "grad_norm": 6.467987060546875, "learning_rate": 1.7542871301921505e-06, "loss": 0.803, "step": 5353 }, { "epoch": 0.78, "grad_norm": 5.633448600769043, "learning_rate": 1.754183201156828e-06, "loss": 0.6577, "step": 5354 }, { "epoch": 0.78, "grad_norm": 5.597508907318115, "learning_rate": 1.7540792532266706e-06, "loss": 0.7604, "step": 5355 }, { "epoch": 0.78, "grad_norm": 5.903875827789307, "learning_rate": 1.7539752864042829e-06, "loss": 0.7327, "step": 5356 }, { "epoch": 0.78, "grad_norm": 5.5011796951293945, "learning_rate": 1.7538713006922696e-06, "loss": 0.7176, "step": 5357 }, { "epoch": 0.78, "grad_norm": 5.921211242675781, "learning_rate": 1.7537672960932359e-06, "loss": 0.6415, "step": 5358 }, { "epoch": 0.78, "grad_norm": 6.3940958976745605, "learning_rate": 1.753663272609787e-06, "loss": 0.7639, "step": 5359 }, { "epoch": 0.78, "grad_norm": 5.817069053649902, "learning_rate": 1.7535592302445295e-06, "loss": 0.7138, "step": 5360 }, { "epoch": 0.78, "grad_norm": 5.681139945983887, "learning_rate": 1.75345516900007e-06, "loss": 0.7789, "step": 5361 }, { "epoch": 0.78, "grad_norm": 5.415027141571045, "learning_rate": 1.7533510888790153e-06, "loss": 0.7146, "step": 5362 }, { "epoch": 0.78, "grad_norm": 5.6775431632995605, "learning_rate": 1.7532469898839732e-06, "loss": 0.6917, "step": 5363 }, { "epoch": 0.78, "grad_norm": 5.7427568435668945, "learning_rate": 1.7531428720175517e-06, "loss": 0.7804, "step": 5364 }, { "epoch": 0.78, "grad_norm": 5.43237829208374, "learning_rate": 1.7530387352823593e-06, "loss": 0.6641, "step": 5365 }, { "epoch": 0.78, "grad_norm": 5.452252388000488, "learning_rate": 1.7529345796810047e-06, "loss": 0.6468, "step": 5366 }, { "epoch": 0.78, "grad_norm": 5.605525970458984, "learning_rate": 1.7528304052160978e-06, "loss": 0.7654, "step": 5367 }, { "epoch": 0.78, "grad_norm": 5.6865034103393555, "learning_rate": 1.7527262118902484e-06, "loss": 0.6702, "step": 5368 }, { "epoch": 0.78, "grad_norm": 5.370187759399414, "learning_rate": 1.7526219997060668e-06, "loss": 0.6921, "step": 5369 }, { "epoch": 0.78, "grad_norm": 5.1891045570373535, "learning_rate": 1.7525177686661637e-06, "loss": 0.6308, "step": 5370 }, { "epoch": 0.78, "grad_norm": 5.965086936950684, "learning_rate": 1.7524135187731506e-06, "loss": 0.675, "step": 5371 }, { "epoch": 0.78, "grad_norm": 5.2234392166137695, "learning_rate": 1.7523092500296394e-06, "loss": 0.6632, "step": 5372 }, { "epoch": 0.78, "grad_norm": 4.974081993103027, "learning_rate": 1.7522049624382425e-06, "loss": 0.6642, "step": 5373 }, { "epoch": 0.78, "grad_norm": 6.2901692390441895, "learning_rate": 1.752100656001572e-06, "loss": 0.7956, "step": 5374 }, { "epoch": 0.78, "grad_norm": 5.890710353851318, "learning_rate": 1.751996330722242e-06, "loss": 0.6707, "step": 5375 }, { "epoch": 0.78, "grad_norm": 5.2485833168029785, "learning_rate": 1.751891986602866e-06, "loss": 0.6789, "step": 5376 }, { "epoch": 0.78, "grad_norm": 6.160521507263184, "learning_rate": 1.7517876236460576e-06, "loss": 0.7013, "step": 5377 }, { "epoch": 0.78, "grad_norm": 5.192419528961182, "learning_rate": 1.7516832418544318e-06, "loss": 0.6552, "step": 5378 }, { "epoch": 0.78, "grad_norm": 5.487624645233154, "learning_rate": 1.751578841230604e-06, "loss": 0.6101, "step": 5379 }, { "epoch": 0.78, "grad_norm": 6.452903747558594, "learning_rate": 1.7514744217771893e-06, "loss": 0.7451, "step": 5380 }, { "epoch": 0.78, "grad_norm": 5.976803779602051, "learning_rate": 1.7513699834968038e-06, "loss": 0.7994, "step": 5381 }, { "epoch": 0.78, "grad_norm": 6.015270709991455, "learning_rate": 1.7512655263920646e-06, "loss": 0.7816, "step": 5382 }, { "epoch": 0.78, "grad_norm": 5.766409397125244, "learning_rate": 1.7511610504655882e-06, "loss": 0.8029, "step": 5383 }, { "epoch": 0.78, "grad_norm": 5.74454927444458, "learning_rate": 1.7510565557199923e-06, "loss": 0.7269, "step": 5384 }, { "epoch": 0.78, "grad_norm": 5.695233345031738, "learning_rate": 1.7509520421578945e-06, "loss": 0.7208, "step": 5385 }, { "epoch": 0.78, "grad_norm": 5.773337364196777, "learning_rate": 1.7508475097819136e-06, "loss": 0.7719, "step": 5386 }, { "epoch": 0.78, "grad_norm": 5.573683261871338, "learning_rate": 1.7507429585946682e-06, "loss": 0.6661, "step": 5387 }, { "epoch": 0.78, "grad_norm": 5.1777825355529785, "learning_rate": 1.750638388598778e-06, "loss": 0.7639, "step": 5388 }, { "epoch": 0.78, "grad_norm": 5.180001735687256, "learning_rate": 1.7505337997968625e-06, "loss": 0.6925, "step": 5389 }, { "epoch": 0.78, "grad_norm": 5.646298885345459, "learning_rate": 1.7504291921915421e-06, "loss": 0.65, "step": 5390 }, { "epoch": 0.78, "grad_norm": 6.0673136711120605, "learning_rate": 1.7503245657854378e-06, "loss": 0.7646, "step": 5391 }, { "epoch": 0.78, "grad_norm": 6.743109226226807, "learning_rate": 1.7502199205811703e-06, "loss": 0.7613, "step": 5392 }, { "epoch": 0.78, "grad_norm": 5.76963996887207, "learning_rate": 1.750115256581362e-06, "loss": 0.6603, "step": 5393 }, { "epoch": 0.78, "grad_norm": 6.1153154373168945, "learning_rate": 1.7500105737886346e-06, "loss": 0.7234, "step": 5394 }, { "epoch": 0.78, "grad_norm": 5.6269755363464355, "learning_rate": 1.7499058722056108e-06, "loss": 0.6665, "step": 5395 }, { "epoch": 0.78, "grad_norm": 6.062260150909424, "learning_rate": 1.749801151834914e-06, "loss": 0.6576, "step": 5396 }, { "epoch": 0.78, "grad_norm": 5.348452568054199, "learning_rate": 1.7496964126791678e-06, "loss": 0.6748, "step": 5397 }, { "epoch": 0.78, "grad_norm": 5.1099419593811035, "learning_rate": 1.7495916547409958e-06, "loss": 0.6921, "step": 5398 }, { "epoch": 0.78, "grad_norm": 5.3251752853393555, "learning_rate": 1.7494868780230232e-06, "loss": 0.7368, "step": 5399 }, { "epoch": 0.78, "grad_norm": 5.166143417358398, "learning_rate": 1.7493820825278745e-06, "loss": 0.7676, "step": 5400 }, { "epoch": 0.78, "grad_norm": 5.570885181427002, "learning_rate": 1.7492772682581753e-06, "loss": 0.7306, "step": 5401 }, { "epoch": 0.78, "grad_norm": 5.772548198699951, "learning_rate": 1.7491724352165514e-06, "loss": 0.6994, "step": 5402 }, { "epoch": 0.78, "grad_norm": 5.470938682556152, "learning_rate": 1.7490675834056296e-06, "loss": 0.664, "step": 5403 }, { "epoch": 0.78, "grad_norm": 5.898911952972412, "learning_rate": 1.7489627128280367e-06, "loss": 0.678, "step": 5404 }, { "epoch": 0.78, "grad_norm": 5.7435383796691895, "learning_rate": 1.7488578234863997e-06, "loss": 0.7362, "step": 5405 }, { "epoch": 0.78, "grad_norm": 5.971429824829102, "learning_rate": 1.7487529153833469e-06, "loss": 0.7505, "step": 5406 }, { "epoch": 0.78, "grad_norm": 6.053066730499268, "learning_rate": 1.7486479885215064e-06, "loss": 0.8065, "step": 5407 }, { "epoch": 0.78, "grad_norm": 5.598404407501221, "learning_rate": 1.748543042903507e-06, "loss": 0.7186, "step": 5408 }, { "epoch": 0.78, "grad_norm": 6.07727575302124, "learning_rate": 1.748438078531978e-06, "loss": 0.7312, "step": 5409 }, { "epoch": 0.78, "grad_norm": 5.024962425231934, "learning_rate": 1.7483330954095488e-06, "loss": 0.7284, "step": 5410 }, { "epoch": 0.79, "grad_norm": 6.642855644226074, "learning_rate": 1.7482280935388497e-06, "loss": 0.6225, "step": 5411 }, { "epoch": 0.79, "grad_norm": 6.216273784637451, "learning_rate": 1.7481230729225117e-06, "loss": 0.8347, "step": 5412 }, { "epoch": 0.79, "grad_norm": 4.926931381225586, "learning_rate": 1.7480180335631656e-06, "loss": 0.6839, "step": 5413 }, { "epoch": 0.79, "grad_norm": 5.779595375061035, "learning_rate": 1.7479129754634433e-06, "loss": 0.7235, "step": 5414 }, { "epoch": 0.79, "grad_norm": 5.3095197677612305, "learning_rate": 1.7478078986259763e-06, "loss": 0.7894, "step": 5415 }, { "epoch": 0.79, "grad_norm": 4.9204421043396, "learning_rate": 1.7477028030533977e-06, "loss": 0.6484, "step": 5416 }, { "epoch": 0.79, "grad_norm": 6.1812744140625, "learning_rate": 1.7475976887483401e-06, "loss": 0.7381, "step": 5417 }, { "epoch": 0.79, "grad_norm": 5.539719581604004, "learning_rate": 1.7474925557134372e-06, "loss": 0.72, "step": 5418 }, { "epoch": 0.79, "grad_norm": 6.029248237609863, "learning_rate": 1.7473874039513227e-06, "loss": 0.6725, "step": 5419 }, { "epoch": 0.79, "grad_norm": 5.071535587310791, "learning_rate": 1.7472822334646313e-06, "loss": 0.6909, "step": 5420 }, { "epoch": 0.79, "grad_norm": 5.548872947692871, "learning_rate": 1.7471770442559978e-06, "loss": 0.6854, "step": 5421 }, { "epoch": 0.79, "grad_norm": 5.50103759765625, "learning_rate": 1.7470718363280574e-06, "loss": 0.657, "step": 5422 }, { "epoch": 0.79, "grad_norm": 5.853391647338867, "learning_rate": 1.7469666096834462e-06, "loss": 0.7266, "step": 5423 }, { "epoch": 0.79, "grad_norm": 6.2248945236206055, "learning_rate": 1.7468613643248e-06, "loss": 0.72, "step": 5424 }, { "epoch": 0.79, "grad_norm": 6.227323532104492, "learning_rate": 1.746756100254756e-06, "loss": 0.7488, "step": 5425 }, { "epoch": 0.79, "grad_norm": 5.503910541534424, "learning_rate": 1.7466508174759513e-06, "loss": 0.6335, "step": 5426 }, { "epoch": 0.79, "grad_norm": 6.416053771972656, "learning_rate": 1.7465455159910234e-06, "loss": 0.7573, "step": 5427 }, { "epoch": 0.79, "grad_norm": 6.628161907196045, "learning_rate": 1.7464401958026107e-06, "loss": 0.7545, "step": 5428 }, { "epoch": 0.79, "grad_norm": 5.583790302276611, "learning_rate": 1.7463348569133517e-06, "loss": 0.6909, "step": 5429 }, { "epoch": 0.79, "grad_norm": 5.384581089019775, "learning_rate": 1.7462294993258856e-06, "loss": 0.7266, "step": 5430 }, { "epoch": 0.79, "grad_norm": 5.793221473693848, "learning_rate": 1.7461241230428519e-06, "loss": 0.7159, "step": 5431 }, { "epoch": 0.79, "grad_norm": 5.1128950119018555, "learning_rate": 1.7460187280668905e-06, "loss": 0.7249, "step": 5432 }, { "epoch": 0.79, "grad_norm": 5.342444896697998, "learning_rate": 1.7459133144006422e-06, "loss": 0.7707, "step": 5433 }, { "epoch": 0.79, "grad_norm": 5.53920841217041, "learning_rate": 1.7458078820467476e-06, "loss": 0.7231, "step": 5434 }, { "epoch": 0.79, "grad_norm": 5.081322193145752, "learning_rate": 1.7457024310078485e-06, "loss": 0.6042, "step": 5435 }, { "epoch": 0.79, "grad_norm": 5.5654520988464355, "learning_rate": 1.7455969612865866e-06, "loss": 0.7275, "step": 5436 }, { "epoch": 0.79, "grad_norm": 5.873206615447998, "learning_rate": 1.7454914728856041e-06, "loss": 0.7808, "step": 5437 }, { "epoch": 0.79, "grad_norm": 4.912949562072754, "learning_rate": 1.7453859658075443e-06, "loss": 0.7424, "step": 5438 }, { "epoch": 0.79, "grad_norm": 6.031919002532959, "learning_rate": 1.74528044005505e-06, "loss": 0.8145, "step": 5439 }, { "epoch": 0.79, "grad_norm": 5.888943195343018, "learning_rate": 1.7451748956307654e-06, "loss": 0.678, "step": 5440 }, { "epoch": 0.79, "grad_norm": 5.883004665374756, "learning_rate": 1.7450693325373344e-06, "loss": 0.7912, "step": 5441 }, { "epoch": 0.79, "grad_norm": 5.041121006011963, "learning_rate": 1.7449637507774022e-06, "loss": 0.7339, "step": 5442 }, { "epoch": 0.79, "grad_norm": 5.752381324768066, "learning_rate": 1.7448581503536132e-06, "loss": 0.7458, "step": 5443 }, { "epoch": 0.79, "grad_norm": 4.736546993255615, "learning_rate": 1.744752531268614e-06, "loss": 0.716, "step": 5444 }, { "epoch": 0.79, "grad_norm": 5.208593845367432, "learning_rate": 1.74464689352505e-06, "loss": 0.69, "step": 5445 }, { "epoch": 0.79, "grad_norm": 5.080650329589844, "learning_rate": 1.744541237125568e-06, "loss": 0.6775, "step": 5446 }, { "epoch": 0.79, "grad_norm": 5.914504528045654, "learning_rate": 1.744435562072815e-06, "loss": 0.6065, "step": 5447 }, { "epoch": 0.79, "grad_norm": 5.715582847595215, "learning_rate": 1.7443298683694388e-06, "loss": 0.7262, "step": 5448 }, { "epoch": 0.79, "grad_norm": 5.635599613189697, "learning_rate": 1.7442241560180868e-06, "loss": 0.7208, "step": 5449 }, { "epoch": 0.79, "grad_norm": 5.219718933105469, "learning_rate": 1.744118425021408e-06, "loss": 0.7249, "step": 5450 }, { "epoch": 0.79, "grad_norm": 5.317111968994141, "learning_rate": 1.7440126753820511e-06, "loss": 0.7171, "step": 5451 }, { "epoch": 0.79, "grad_norm": 6.032434940338135, "learning_rate": 1.7439069071026658e-06, "loss": 0.6745, "step": 5452 }, { "epoch": 0.79, "grad_norm": 5.767712593078613, "learning_rate": 1.7438011201859013e-06, "loss": 0.7311, "step": 5453 }, { "epoch": 0.79, "grad_norm": 5.152134418487549, "learning_rate": 1.7436953146344085e-06, "loss": 0.6821, "step": 5454 }, { "epoch": 0.79, "grad_norm": 5.937392711639404, "learning_rate": 1.7435894904508376e-06, "loss": 0.6442, "step": 5455 }, { "epoch": 0.79, "grad_norm": 5.541552543640137, "learning_rate": 1.7434836476378408e-06, "loss": 0.742, "step": 5456 }, { "epoch": 0.79, "grad_norm": 5.639612674713135, "learning_rate": 1.7433777861980687e-06, "loss": 0.6535, "step": 5457 }, { "epoch": 0.79, "grad_norm": 5.135616779327393, "learning_rate": 1.7432719061341743e-06, "loss": 0.6595, "step": 5458 }, { "epoch": 0.79, "grad_norm": 6.455742835998535, "learning_rate": 1.7431660074488099e-06, "loss": 0.8693, "step": 5459 }, { "epoch": 0.79, "grad_norm": 5.533550262451172, "learning_rate": 1.743060090144629e-06, "loss": 0.721, "step": 5460 }, { "epoch": 0.79, "grad_norm": 5.294514179229736, "learning_rate": 1.7429541542242846e-06, "loss": 0.683, "step": 5461 }, { "epoch": 0.79, "grad_norm": 5.355113506317139, "learning_rate": 1.742848199690431e-06, "loss": 0.6845, "step": 5462 }, { "epoch": 0.79, "grad_norm": 6.324779033660889, "learning_rate": 1.7427422265457227e-06, "loss": 0.6937, "step": 5463 }, { "epoch": 0.79, "grad_norm": 5.310692310333252, "learning_rate": 1.742636234792815e-06, "loss": 0.7293, "step": 5464 }, { "epoch": 0.79, "grad_norm": 6.25261116027832, "learning_rate": 1.742530224434363e-06, "loss": 0.6917, "step": 5465 }, { "epoch": 0.79, "grad_norm": 6.12930965423584, "learning_rate": 1.7424241954730226e-06, "loss": 0.7284, "step": 5466 }, { "epoch": 0.79, "grad_norm": 6.141483306884766, "learning_rate": 1.7423181479114501e-06, "loss": 0.768, "step": 5467 }, { "epoch": 0.79, "grad_norm": 5.285187244415283, "learning_rate": 1.742212081752303e-06, "loss": 0.6752, "step": 5468 }, { "epoch": 0.79, "grad_norm": 5.339865207672119, "learning_rate": 1.742105996998238e-06, "loss": 0.6872, "step": 5469 }, { "epoch": 0.79, "grad_norm": 5.039766788482666, "learning_rate": 1.7419998936519131e-06, "loss": 0.6706, "step": 5470 }, { "epoch": 0.79, "grad_norm": 5.312576770782471, "learning_rate": 1.7418937717159863e-06, "loss": 0.7084, "step": 5471 }, { "epoch": 0.79, "grad_norm": 5.324744701385498, "learning_rate": 1.7417876311931169e-06, "loss": 0.6414, "step": 5472 }, { "epoch": 0.79, "grad_norm": 5.014306545257568, "learning_rate": 1.741681472085963e-06, "loss": 0.6938, "step": 5473 }, { "epoch": 0.79, "grad_norm": 5.83002233505249, "learning_rate": 1.7415752943971853e-06, "loss": 0.6356, "step": 5474 }, { "epoch": 0.79, "grad_norm": 5.467231750488281, "learning_rate": 1.7414690981294435e-06, "loss": 0.6657, "step": 5475 }, { "epoch": 0.79, "grad_norm": 5.736600399017334, "learning_rate": 1.7413628832853983e-06, "loss": 0.7369, "step": 5476 }, { "epoch": 0.79, "grad_norm": 5.088009834289551, "learning_rate": 1.7412566498677106e-06, "loss": 0.7192, "step": 5477 }, { "epoch": 0.79, "grad_norm": 5.3377766609191895, "learning_rate": 1.7411503978790418e-06, "loss": 0.7147, "step": 5478 }, { "epoch": 0.79, "grad_norm": 5.868986129760742, "learning_rate": 1.7410441273220542e-06, "loss": 0.6767, "step": 5479 }, { "epoch": 0.8, "grad_norm": 5.918879508972168, "learning_rate": 1.7409378381994102e-06, "loss": 0.714, "step": 5480 }, { "epoch": 0.8, "grad_norm": 5.734932899475098, "learning_rate": 1.7408315305137724e-06, "loss": 0.6715, "step": 5481 }, { "epoch": 0.8, "grad_norm": 5.774941921234131, "learning_rate": 1.7407252042678041e-06, "loss": 0.7447, "step": 5482 }, { "epoch": 0.8, "grad_norm": 5.714343547821045, "learning_rate": 1.74061885946417e-06, "loss": 0.7252, "step": 5483 }, { "epoch": 0.8, "grad_norm": 5.826413631439209, "learning_rate": 1.740512496105533e-06, "loss": 0.6364, "step": 5484 }, { "epoch": 0.8, "grad_norm": 4.843405246734619, "learning_rate": 1.740406114194559e-06, "loss": 0.6829, "step": 5485 }, { "epoch": 0.8, "grad_norm": 5.50135612487793, "learning_rate": 1.7402997137339128e-06, "loss": 0.7362, "step": 5486 }, { "epoch": 0.8, "grad_norm": 5.412383079528809, "learning_rate": 1.7401932947262601e-06, "loss": 0.674, "step": 5487 }, { "epoch": 0.8, "grad_norm": 5.724944114685059, "learning_rate": 1.7400868571742672e-06, "loss": 0.6529, "step": 5488 }, { "epoch": 0.8, "grad_norm": 5.516857147216797, "learning_rate": 1.7399804010806003e-06, "loss": 0.7106, "step": 5489 }, { "epoch": 0.8, "grad_norm": 5.4319233894348145, "learning_rate": 1.7398739264479273e-06, "loss": 0.732, "step": 5490 }, { "epoch": 0.8, "grad_norm": 6.763281345367432, "learning_rate": 1.739767433278915e-06, "loss": 0.8256, "step": 5491 }, { "epoch": 0.8, "grad_norm": 6.133184909820557, "learning_rate": 1.7396609215762315e-06, "loss": 0.7537, "step": 5492 }, { "epoch": 0.8, "grad_norm": 5.406322479248047, "learning_rate": 1.7395543913425455e-06, "loss": 0.7673, "step": 5493 }, { "epoch": 0.8, "grad_norm": 5.603578090667725, "learning_rate": 1.7394478425805258e-06, "loss": 0.6773, "step": 5494 }, { "epoch": 0.8, "grad_norm": 6.02548360824585, "learning_rate": 1.7393412752928423e-06, "loss": 0.7755, "step": 5495 }, { "epoch": 0.8, "grad_norm": 5.749173164367676, "learning_rate": 1.739234689482164e-06, "loss": 0.714, "step": 5496 }, { "epoch": 0.8, "grad_norm": 5.149771690368652, "learning_rate": 1.7391280851511616e-06, "loss": 0.6905, "step": 5497 }, { "epoch": 0.8, "grad_norm": 5.700586795806885, "learning_rate": 1.7390214623025063e-06, "loss": 0.6875, "step": 5498 }, { "epoch": 0.8, "grad_norm": 5.582947731018066, "learning_rate": 1.738914820938869e-06, "loss": 0.7677, "step": 5499 }, { "epoch": 0.8, "grad_norm": 4.792266368865967, "learning_rate": 1.7388081610629215e-06, "loss": 0.6192, "step": 5500 }, { "epoch": 0.8, "grad_norm": 5.015369415283203, "learning_rate": 1.7387014826773359e-06, "loss": 0.6633, "step": 5501 }, { "epoch": 0.8, "grad_norm": 5.501717567443848, "learning_rate": 1.7385947857847847e-06, "loss": 0.7314, "step": 5502 }, { "epoch": 0.8, "grad_norm": 5.24373197555542, "learning_rate": 1.7384880703879414e-06, "loss": 0.7035, "step": 5503 }, { "epoch": 0.8, "grad_norm": 5.427967071533203, "learning_rate": 1.7383813364894798e-06, "loss": 0.7203, "step": 5504 }, { "epoch": 0.8, "grad_norm": 5.234959125518799, "learning_rate": 1.738274584092073e-06, "loss": 0.6712, "step": 5505 }, { "epoch": 0.8, "grad_norm": 5.790365219116211, "learning_rate": 1.7381678131983963e-06, "loss": 0.7049, "step": 5506 }, { "epoch": 0.8, "grad_norm": 5.3404316902160645, "learning_rate": 1.7380610238111244e-06, "loss": 0.64, "step": 5507 }, { "epoch": 0.8, "grad_norm": 6.6809163093566895, "learning_rate": 1.737954215932933e-06, "loss": 0.787, "step": 5508 }, { "epoch": 0.8, "grad_norm": 6.356119155883789, "learning_rate": 1.7378473895664973e-06, "loss": 0.6908, "step": 5509 }, { "epoch": 0.8, "grad_norm": 6.805253505706787, "learning_rate": 1.7377405447144943e-06, "loss": 0.7289, "step": 5510 }, { "epoch": 0.8, "grad_norm": 5.644802570343018, "learning_rate": 1.7376336813796008e-06, "loss": 0.7003, "step": 5511 }, { "epoch": 0.8, "grad_norm": 5.972832679748535, "learning_rate": 1.737526799564494e-06, "loss": 0.7062, "step": 5512 }, { "epoch": 0.8, "grad_norm": 4.944746017456055, "learning_rate": 1.7374198992718514e-06, "loss": 0.5559, "step": 5513 }, { "epoch": 0.8, "grad_norm": 5.13455867767334, "learning_rate": 1.7373129805043515e-06, "loss": 0.6811, "step": 5514 }, { "epoch": 0.8, "grad_norm": 5.5168232917785645, "learning_rate": 1.7372060432646729e-06, "loss": 0.6451, "step": 5515 }, { "epoch": 0.8, "grad_norm": 5.195468902587891, "learning_rate": 1.7370990875554944e-06, "loss": 0.6581, "step": 5516 }, { "epoch": 0.8, "grad_norm": 5.9659905433654785, "learning_rate": 1.7369921133794965e-06, "loss": 0.8063, "step": 5517 }, { "epoch": 0.8, "grad_norm": 5.5510945320129395, "learning_rate": 1.7368851207393584e-06, "loss": 0.7383, "step": 5518 }, { "epoch": 0.8, "grad_norm": 5.9253950119018555, "learning_rate": 1.7367781096377607e-06, "loss": 0.7477, "step": 5519 }, { "epoch": 0.8, "grad_norm": 4.99484920501709, "learning_rate": 1.7366710800773849e-06, "loss": 0.6973, "step": 5520 }, { "epoch": 0.8, "grad_norm": 6.034415245056152, "learning_rate": 1.736564032060912e-06, "loss": 0.7167, "step": 5521 }, { "epoch": 0.8, "grad_norm": 5.315212249755859, "learning_rate": 1.7364569655910243e-06, "loss": 0.6989, "step": 5522 }, { "epoch": 0.8, "grad_norm": 5.431301593780518, "learning_rate": 1.7363498806704038e-06, "loss": 0.7865, "step": 5523 }, { "epoch": 0.8, "grad_norm": 5.922562122344971, "learning_rate": 1.7362427773017333e-06, "loss": 0.7726, "step": 5524 }, { "epoch": 0.8, "grad_norm": 5.446007251739502, "learning_rate": 1.7361356554876964e-06, "loss": 0.7252, "step": 5525 }, { "epoch": 0.8, "grad_norm": 5.522635459899902, "learning_rate": 1.7360285152309768e-06, "loss": 0.6706, "step": 5526 }, { "epoch": 0.8, "grad_norm": 6.244074821472168, "learning_rate": 1.7359213565342588e-06, "loss": 0.6615, "step": 5527 }, { "epoch": 0.8, "grad_norm": 5.680330753326416, "learning_rate": 1.7358141794002265e-06, "loss": 0.7599, "step": 5528 }, { "epoch": 0.8, "grad_norm": 6.146098613739014, "learning_rate": 1.735706983831566e-06, "loss": 0.743, "step": 5529 }, { "epoch": 0.8, "grad_norm": 5.7862725257873535, "learning_rate": 1.735599769830962e-06, "loss": 0.7495, "step": 5530 }, { "epoch": 0.8, "grad_norm": 5.936056137084961, "learning_rate": 1.7354925374011015e-06, "loss": 0.7943, "step": 5531 }, { "epoch": 0.8, "grad_norm": 5.204433917999268, "learning_rate": 1.73538528654467e-06, "loss": 0.7134, "step": 5532 }, { "epoch": 0.8, "grad_norm": 5.559885025024414, "learning_rate": 1.7352780172643555e-06, "loss": 0.6997, "step": 5533 }, { "epoch": 0.8, "grad_norm": 6.1709699630737305, "learning_rate": 1.7351707295628449e-06, "loss": 0.7048, "step": 5534 }, { "epoch": 0.8, "grad_norm": 5.714541435241699, "learning_rate": 1.7350634234428258e-06, "loss": 0.8168, "step": 5535 }, { "epoch": 0.8, "grad_norm": 5.389039993286133, "learning_rate": 1.7349560989069873e-06, "loss": 0.7302, "step": 5536 }, { "epoch": 0.8, "grad_norm": 6.202449798583984, "learning_rate": 1.7348487559580178e-06, "loss": 0.7096, "step": 5537 }, { "epoch": 0.8, "grad_norm": 5.825838088989258, "learning_rate": 1.7347413945986068e-06, "loss": 0.7263, "step": 5538 }, { "epoch": 0.8, "grad_norm": 6.345028877258301, "learning_rate": 1.7346340148314437e-06, "loss": 0.7479, "step": 5539 }, { "epoch": 0.8, "grad_norm": 5.872860908508301, "learning_rate": 1.7345266166592193e-06, "loss": 0.7766, "step": 5540 }, { "epoch": 0.8, "grad_norm": 5.609152317047119, "learning_rate": 1.7344192000846244e-06, "loss": 0.6838, "step": 5541 }, { "epoch": 0.8, "grad_norm": 5.184423446655273, "learning_rate": 1.7343117651103492e-06, "loss": 0.7838, "step": 5542 }, { "epoch": 0.8, "grad_norm": 5.116192817687988, "learning_rate": 1.734204311739086e-06, "loss": 0.7012, "step": 5543 }, { "epoch": 0.8, "grad_norm": 5.009350299835205, "learning_rate": 1.7340968399735264e-06, "loss": 0.6254, "step": 5544 }, { "epoch": 0.8, "grad_norm": 5.549560070037842, "learning_rate": 1.7339893498163636e-06, "loss": 0.6671, "step": 5545 }, { "epoch": 0.8, "grad_norm": 5.615821838378906, "learning_rate": 1.7338818412702905e-06, "loss": 0.7318, "step": 5546 }, { "epoch": 0.8, "grad_norm": 5.362834453582764, "learning_rate": 1.7337743143379998e-06, "loss": 0.723, "step": 5547 }, { "epoch": 0.8, "grad_norm": 5.427037715911865, "learning_rate": 1.7336667690221862e-06, "loss": 0.7384, "step": 5548 }, { "epoch": 0.81, "grad_norm": 5.798223495483398, "learning_rate": 1.7335592053255437e-06, "loss": 0.7914, "step": 5549 }, { "epoch": 0.81, "grad_norm": 5.315761566162109, "learning_rate": 1.733451623250767e-06, "loss": 0.6454, "step": 5550 }, { "epoch": 0.81, "grad_norm": 5.799628257751465, "learning_rate": 1.7333440228005518e-06, "loss": 0.6085, "step": 5551 }, { "epoch": 0.81, "grad_norm": 5.27633810043335, "learning_rate": 1.7332364039775938e-06, "loss": 0.6517, "step": 5552 }, { "epoch": 0.81, "grad_norm": 5.461061954498291, "learning_rate": 1.7331287667845892e-06, "loss": 0.6634, "step": 5553 }, { "epoch": 0.81, "grad_norm": 5.191344261169434, "learning_rate": 1.7330211112242342e-06, "loss": 0.6885, "step": 5554 }, { "epoch": 0.81, "grad_norm": 6.862148284912109, "learning_rate": 1.7329134372992264e-06, "loss": 0.7853, "step": 5555 }, { "epoch": 0.81, "grad_norm": 6.21980094909668, "learning_rate": 1.7328057450122633e-06, "loss": 0.7109, "step": 5556 }, { "epoch": 0.81, "grad_norm": 6.167130470275879, "learning_rate": 1.732698034366043e-06, "loss": 0.7176, "step": 5557 }, { "epoch": 0.81, "grad_norm": 5.454816818237305, "learning_rate": 1.732590305363264e-06, "loss": 0.7504, "step": 5558 }, { "epoch": 0.81, "grad_norm": 5.361830711364746, "learning_rate": 1.7324825580066248e-06, "loss": 0.6605, "step": 5559 }, { "epoch": 0.81, "grad_norm": 6.121615886688232, "learning_rate": 1.732374792298826e-06, "loss": 0.742, "step": 5560 }, { "epoch": 0.81, "grad_norm": 5.508793354034424, "learning_rate": 1.7322670082425664e-06, "loss": 0.6873, "step": 5561 }, { "epoch": 0.81, "grad_norm": 5.35608434677124, "learning_rate": 1.7321592058405466e-06, "loss": 0.7069, "step": 5562 }, { "epoch": 0.81, "grad_norm": 5.397994518280029, "learning_rate": 1.7320513850954677e-06, "loss": 0.7189, "step": 5563 }, { "epoch": 0.81, "grad_norm": 5.673569202423096, "learning_rate": 1.7319435460100307e-06, "loss": 0.7792, "step": 5564 }, { "epoch": 0.81, "grad_norm": 5.5031867027282715, "learning_rate": 1.7318356885869375e-06, "loss": 0.7476, "step": 5565 }, { "epoch": 0.81, "grad_norm": 6.008081436157227, "learning_rate": 1.73172781282889e-06, "loss": 0.7866, "step": 5566 }, { "epoch": 0.81, "grad_norm": 5.074884414672852, "learning_rate": 1.7316199187385915e-06, "loss": 0.6758, "step": 5567 }, { "epoch": 0.81, "grad_norm": 5.078564643859863, "learning_rate": 1.7315120063187444e-06, "loss": 0.686, "step": 5568 }, { "epoch": 0.81, "grad_norm": 6.0009355545043945, "learning_rate": 1.7314040755720525e-06, "loss": 0.661, "step": 5569 }, { "epoch": 0.81, "grad_norm": 5.657193183898926, "learning_rate": 1.73129612650122e-06, "loss": 0.7197, "step": 5570 }, { "epoch": 0.81, "grad_norm": 5.522998809814453, "learning_rate": 1.7311881591089515e-06, "loss": 0.8237, "step": 5571 }, { "epoch": 0.81, "grad_norm": 5.2652177810668945, "learning_rate": 1.7310801733979513e-06, "loss": 0.7082, "step": 5572 }, { "epoch": 0.81, "grad_norm": 5.344839572906494, "learning_rate": 1.7309721693709257e-06, "loss": 0.65, "step": 5573 }, { "epoch": 0.81, "grad_norm": 5.501291751861572, "learning_rate": 1.73086414703058e-06, "loss": 0.721, "step": 5574 }, { "epoch": 0.81, "grad_norm": 5.578754425048828, "learning_rate": 1.7307561063796203e-06, "loss": 0.694, "step": 5575 }, { "epoch": 0.81, "grad_norm": 5.929275989532471, "learning_rate": 1.7306480474207538e-06, "loss": 0.7142, "step": 5576 }, { "epoch": 0.81, "grad_norm": 5.430032253265381, "learning_rate": 1.730539970156688e-06, "loss": 0.7151, "step": 5577 }, { "epoch": 0.81, "grad_norm": 5.994165420532227, "learning_rate": 1.7304318745901297e-06, "loss": 0.7519, "step": 5578 }, { "epoch": 0.81, "grad_norm": 5.875890731811523, "learning_rate": 1.7303237607237882e-06, "loss": 0.7078, "step": 5579 }, { "epoch": 0.81, "grad_norm": 5.761740207672119, "learning_rate": 1.7302156285603713e-06, "loss": 0.7381, "step": 5580 }, { "epoch": 0.81, "grad_norm": 5.9032816886901855, "learning_rate": 1.7301074781025882e-06, "loss": 0.6102, "step": 5581 }, { "epoch": 0.81, "grad_norm": 5.830493927001953, "learning_rate": 1.7299993093531487e-06, "loss": 0.7679, "step": 5582 }, { "epoch": 0.81, "grad_norm": 4.959179401397705, "learning_rate": 1.7298911223147625e-06, "loss": 0.6279, "step": 5583 }, { "epoch": 0.81, "grad_norm": 5.483879566192627, "learning_rate": 1.72978291699014e-06, "loss": 0.7054, "step": 5584 }, { "epoch": 0.81, "grad_norm": 5.358422756195068, "learning_rate": 1.7296746933819927e-06, "loss": 0.6552, "step": 5585 }, { "epoch": 0.81, "grad_norm": 5.431930065155029, "learning_rate": 1.7295664514930313e-06, "loss": 0.6938, "step": 5586 }, { "epoch": 0.81, "grad_norm": 5.307535171508789, "learning_rate": 1.729458191325968e-06, "loss": 0.6737, "step": 5587 }, { "epoch": 0.81, "grad_norm": 5.832796573638916, "learning_rate": 1.7293499128835148e-06, "loss": 0.7878, "step": 5588 }, { "epoch": 0.81, "grad_norm": 5.6168293952941895, "learning_rate": 1.7292416161683847e-06, "loss": 0.6752, "step": 5589 }, { "epoch": 0.81, "grad_norm": 5.549707889556885, "learning_rate": 1.7291333011832906e-06, "loss": 0.7623, "step": 5590 }, { "epoch": 0.81, "grad_norm": 5.665518760681152, "learning_rate": 1.7290249679309467e-06, "loss": 0.7263, "step": 5591 }, { "epoch": 0.81, "grad_norm": 5.682153701782227, "learning_rate": 1.7289166164140666e-06, "loss": 0.7216, "step": 5592 }, { "epoch": 0.81, "grad_norm": 5.9002838134765625, "learning_rate": 1.7288082466353648e-06, "loss": 0.7397, "step": 5593 }, { "epoch": 0.81, "grad_norm": 6.097045421600342, "learning_rate": 1.7286998585975572e-06, "loss": 0.7042, "step": 5594 }, { "epoch": 0.81, "grad_norm": 6.996170997619629, "learning_rate": 1.728591452303358e-06, "loss": 0.7174, "step": 5595 }, { "epoch": 0.81, "grad_norm": 5.752130031585693, "learning_rate": 1.7284830277554844e-06, "loss": 0.7596, "step": 5596 }, { "epoch": 0.81, "grad_norm": 6.17066764831543, "learning_rate": 1.7283745849566515e-06, "loss": 0.746, "step": 5597 }, { "epoch": 0.81, "grad_norm": 5.781616687774658, "learning_rate": 1.7282661239095773e-06, "loss": 0.7234, "step": 5598 }, { "epoch": 0.81, "grad_norm": 5.625521183013916, "learning_rate": 1.7281576446169787e-06, "loss": 0.7376, "step": 5599 }, { "epoch": 0.81, "grad_norm": 4.7832255363464355, "learning_rate": 1.7280491470815734e-06, "loss": 0.6359, "step": 5600 }, { "epoch": 0.81, "grad_norm": 5.212627410888672, "learning_rate": 1.7279406313060795e-06, "loss": 0.6982, "step": 5601 }, { "epoch": 0.81, "grad_norm": 5.45220422744751, "learning_rate": 1.727832097293216e-06, "loss": 0.7058, "step": 5602 }, { "epoch": 0.81, "grad_norm": 5.3871917724609375, "learning_rate": 1.727723545045702e-06, "loss": 0.679, "step": 5603 }, { "epoch": 0.81, "grad_norm": 5.690182685852051, "learning_rate": 1.7276149745662569e-06, "loss": 0.7096, "step": 5604 }, { "epoch": 0.81, "grad_norm": 5.582427978515625, "learning_rate": 1.727506385857601e-06, "loss": 0.6893, "step": 5605 }, { "epoch": 0.81, "grad_norm": 5.470937252044678, "learning_rate": 1.7273977789224544e-06, "loss": 0.6786, "step": 5606 }, { "epoch": 0.81, "grad_norm": 5.827304840087891, "learning_rate": 1.7272891537635383e-06, "loss": 0.6797, "step": 5607 }, { "epoch": 0.81, "grad_norm": 5.567333698272705, "learning_rate": 1.7271805103835746e-06, "loss": 0.7024, "step": 5608 }, { "epoch": 0.81, "grad_norm": 5.849971771240234, "learning_rate": 1.7270718487852844e-06, "loss": 0.7107, "step": 5609 }, { "epoch": 0.81, "grad_norm": 6.06764030456543, "learning_rate": 1.7269631689713907e-06, "loss": 0.7638, "step": 5610 }, { "epoch": 0.81, "grad_norm": 5.9257354736328125, "learning_rate": 1.7268544709446155e-06, "loss": 0.7173, "step": 5611 }, { "epoch": 0.81, "grad_norm": 5.7101359367370605, "learning_rate": 1.7267457547076832e-06, "loss": 0.8224, "step": 5612 }, { "epoch": 0.81, "grad_norm": 5.011542797088623, "learning_rate": 1.7266370202633162e-06, "loss": 0.6905, "step": 5613 }, { "epoch": 0.81, "grad_norm": 6.204484939575195, "learning_rate": 1.7265282676142395e-06, "loss": 0.7592, "step": 5614 }, { "epoch": 0.81, "grad_norm": 5.970929145812988, "learning_rate": 1.7264194967631778e-06, "loss": 0.6992, "step": 5615 }, { "epoch": 0.81, "grad_norm": 6.225851058959961, "learning_rate": 1.7263107077128558e-06, "loss": 0.799, "step": 5616 }, { "epoch": 0.82, "grad_norm": 5.0499372482299805, "learning_rate": 1.7262019004659992e-06, "loss": 0.7146, "step": 5617 }, { "epoch": 0.82, "grad_norm": 5.898896217346191, "learning_rate": 1.7260930750253338e-06, "loss": 0.8132, "step": 5618 }, { "epoch": 0.82, "grad_norm": 5.658083915710449, "learning_rate": 1.725984231393586e-06, "loss": 0.697, "step": 5619 }, { "epoch": 0.82, "grad_norm": 5.875458240509033, "learning_rate": 1.7258753695734834e-06, "loss": 0.7321, "step": 5620 }, { "epoch": 0.82, "grad_norm": 5.509675979614258, "learning_rate": 1.7257664895677526e-06, "loss": 0.729, "step": 5621 }, { "epoch": 0.82, "grad_norm": 5.838710784912109, "learning_rate": 1.7256575913791216e-06, "loss": 0.6732, "step": 5622 }, { "epoch": 0.82, "grad_norm": 5.287534713745117, "learning_rate": 1.7255486750103185e-06, "loss": 0.7333, "step": 5623 }, { "epoch": 0.82, "grad_norm": 5.7581586837768555, "learning_rate": 1.7254397404640726e-06, "loss": 0.7106, "step": 5624 }, { "epoch": 0.82, "grad_norm": 6.082570552825928, "learning_rate": 1.7253307877431124e-06, "loss": 0.6221, "step": 5625 }, { "epoch": 0.82, "grad_norm": 5.970445156097412, "learning_rate": 1.7252218168501682e-06, "loss": 0.6663, "step": 5626 }, { "epoch": 0.82, "grad_norm": 5.33740234375, "learning_rate": 1.7251128277879694e-06, "loss": 0.6479, "step": 5627 }, { "epoch": 0.82, "grad_norm": 5.498241424560547, "learning_rate": 1.7250038205592472e-06, "loss": 0.6904, "step": 5628 }, { "epoch": 0.82, "grad_norm": 6.260354995727539, "learning_rate": 1.7248947951667324e-06, "loss": 0.6961, "step": 5629 }, { "epoch": 0.82, "grad_norm": 5.709351062774658, "learning_rate": 1.7247857516131558e-06, "loss": 0.6746, "step": 5630 }, { "epoch": 0.82, "grad_norm": 5.476194381713867, "learning_rate": 1.7246766899012503e-06, "loss": 0.8205, "step": 5631 }, { "epoch": 0.82, "grad_norm": 5.330048084259033, "learning_rate": 1.7245676100337477e-06, "loss": 0.6435, "step": 5632 }, { "epoch": 0.82, "grad_norm": 5.549811840057373, "learning_rate": 1.7244585120133808e-06, "loss": 0.6494, "step": 5633 }, { "epoch": 0.82, "grad_norm": 5.211930274963379, "learning_rate": 1.724349395842883e-06, "loss": 0.6647, "step": 5634 }, { "epoch": 0.82, "grad_norm": 5.427642345428467, "learning_rate": 1.7242402615249882e-06, "loss": 0.7409, "step": 5635 }, { "epoch": 0.82, "grad_norm": 6.759654521942139, "learning_rate": 1.7241311090624304e-06, "loss": 0.7825, "step": 5636 }, { "epoch": 0.82, "grad_norm": 5.9753828048706055, "learning_rate": 1.7240219384579442e-06, "loss": 0.7824, "step": 5637 }, { "epoch": 0.82, "grad_norm": 5.302438735961914, "learning_rate": 1.7239127497142647e-06, "loss": 0.7845, "step": 5638 }, { "epoch": 0.82, "grad_norm": 5.705085754394531, "learning_rate": 1.7238035428341274e-06, "loss": 0.7431, "step": 5639 }, { "epoch": 0.82, "grad_norm": 5.074592590332031, "learning_rate": 1.7236943178202683e-06, "loss": 0.6739, "step": 5640 }, { "epoch": 0.82, "grad_norm": 4.9310150146484375, "learning_rate": 1.7235850746754245e-06, "loss": 0.717, "step": 5641 }, { "epoch": 0.82, "grad_norm": 5.943291664123535, "learning_rate": 1.7234758134023317e-06, "loss": 0.7771, "step": 5642 }, { "epoch": 0.82, "grad_norm": 5.176537990570068, "learning_rate": 1.7233665340037282e-06, "loss": 0.7278, "step": 5643 }, { "epoch": 0.82, "grad_norm": 5.0736002922058105, "learning_rate": 1.7232572364823516e-06, "loss": 0.6996, "step": 5644 }, { "epoch": 0.82, "grad_norm": 6.3698625564575195, "learning_rate": 1.72314792084094e-06, "loss": 0.8485, "step": 5645 }, { "epoch": 0.82, "grad_norm": 5.889937877655029, "learning_rate": 1.7230385870822325e-06, "loss": 0.7078, "step": 5646 }, { "epoch": 0.82, "grad_norm": 5.253474712371826, "learning_rate": 1.722929235208968e-06, "loss": 0.7124, "step": 5647 }, { "epoch": 0.82, "grad_norm": 5.923790454864502, "learning_rate": 1.7228198652238856e-06, "loss": 0.6503, "step": 5648 }, { "epoch": 0.82, "grad_norm": 4.931445598602295, "learning_rate": 1.7227104771297265e-06, "loss": 0.7057, "step": 5649 }, { "epoch": 0.82, "grad_norm": 5.701398849487305, "learning_rate": 1.7226010709292304e-06, "loss": 0.6592, "step": 5650 }, { "epoch": 0.82, "grad_norm": 5.984412670135498, "learning_rate": 1.7224916466251387e-06, "loss": 0.7762, "step": 5651 }, { "epoch": 0.82, "grad_norm": 6.466370582580566, "learning_rate": 1.7223822042201928e-06, "loss": 0.7131, "step": 5652 }, { "epoch": 0.82, "grad_norm": 5.665416240692139, "learning_rate": 1.7222727437171342e-06, "loss": 0.6754, "step": 5653 }, { "epoch": 0.82, "grad_norm": 4.967039585113525, "learning_rate": 1.7221632651187058e-06, "loss": 0.6848, "step": 5654 }, { "epoch": 0.82, "grad_norm": 5.944474697113037, "learning_rate": 1.7220537684276502e-06, "loss": 0.7564, "step": 5655 }, { "epoch": 0.82, "grad_norm": 5.7920966148376465, "learning_rate": 1.7219442536467104e-06, "loss": 0.7512, "step": 5656 }, { "epoch": 0.82, "grad_norm": 5.269107818603516, "learning_rate": 1.7218347207786307e-06, "loss": 0.7231, "step": 5657 }, { "epoch": 0.82, "grad_norm": 5.605598449707031, "learning_rate": 1.7217251698261547e-06, "loss": 0.6895, "step": 5658 }, { "epoch": 0.82, "grad_norm": 5.245456695556641, "learning_rate": 1.7216156007920275e-06, "loss": 0.7068, "step": 5659 }, { "epoch": 0.82, "grad_norm": 5.891154766082764, "learning_rate": 1.7215060136789935e-06, "loss": 0.8547, "step": 5660 }, { "epoch": 0.82, "grad_norm": 5.840243339538574, "learning_rate": 1.7213964084897992e-06, "loss": 0.7363, "step": 5661 }, { "epoch": 0.82, "grad_norm": 5.70731258392334, "learning_rate": 1.7212867852271894e-06, "loss": 0.7695, "step": 5662 }, { "epoch": 0.82, "grad_norm": 6.188215732574463, "learning_rate": 1.7211771438939117e-06, "loss": 0.7949, "step": 5663 }, { "epoch": 0.82, "grad_norm": 4.9024248123168945, "learning_rate": 1.7210674844927122e-06, "loss": 0.6507, "step": 5664 }, { "epoch": 0.82, "grad_norm": 5.981057167053223, "learning_rate": 1.7209578070263386e-06, "loss": 0.7681, "step": 5665 }, { "epoch": 0.82, "grad_norm": 5.729227542877197, "learning_rate": 1.7208481114975383e-06, "loss": 0.7609, "step": 5666 }, { "epoch": 0.82, "grad_norm": 5.481531620025635, "learning_rate": 1.7207383979090603e-06, "loss": 0.6422, "step": 5667 }, { "epoch": 0.82, "grad_norm": 5.495312690734863, "learning_rate": 1.7206286662636527e-06, "loss": 0.6483, "step": 5668 }, { "epoch": 0.82, "grad_norm": 5.669281005859375, "learning_rate": 1.7205189165640643e-06, "loss": 0.6873, "step": 5669 }, { "epoch": 0.82, "grad_norm": 5.59031343460083, "learning_rate": 1.7204091488130458e-06, "loss": 0.7624, "step": 5670 }, { "epoch": 0.82, "grad_norm": 5.0268025398254395, "learning_rate": 1.7202993630133463e-06, "loss": 0.6914, "step": 5671 }, { "epoch": 0.82, "grad_norm": 5.3411431312561035, "learning_rate": 1.7201895591677165e-06, "loss": 0.6957, "step": 5672 }, { "epoch": 0.82, "grad_norm": 6.016159534454346, "learning_rate": 1.7200797372789075e-06, "loss": 0.749, "step": 5673 }, { "epoch": 0.82, "grad_norm": 6.294692516326904, "learning_rate": 1.7199698973496709e-06, "loss": 0.7906, "step": 5674 }, { "epoch": 0.82, "grad_norm": 5.424446105957031, "learning_rate": 1.7198600393827583e-06, "loss": 0.7386, "step": 5675 }, { "epoch": 0.82, "grad_norm": 5.633044719696045, "learning_rate": 1.719750163380922e-06, "loss": 0.6712, "step": 5676 }, { "epoch": 0.82, "grad_norm": 5.29971981048584, "learning_rate": 1.7196402693469147e-06, "loss": 0.6786, "step": 5677 }, { "epoch": 0.82, "grad_norm": 5.73423957824707, "learning_rate": 1.71953035728349e-06, "loss": 0.5913, "step": 5678 }, { "epoch": 0.82, "grad_norm": 5.748012065887451, "learning_rate": 1.7194204271934012e-06, "loss": 0.6694, "step": 5679 }, { "epoch": 0.82, "grad_norm": 5.596510410308838, "learning_rate": 1.7193104790794023e-06, "loss": 0.6671, "step": 5680 }, { "epoch": 0.82, "grad_norm": 5.674532890319824, "learning_rate": 1.7192005129442486e-06, "loss": 0.7303, "step": 5681 }, { "epoch": 0.82, "grad_norm": 5.893429756164551, "learning_rate": 1.7190905287906942e-06, "loss": 0.6941, "step": 5682 }, { "epoch": 0.82, "grad_norm": 5.5325541496276855, "learning_rate": 1.7189805266214954e-06, "loss": 0.7911, "step": 5683 }, { "epoch": 0.82, "grad_norm": 5.35137939453125, "learning_rate": 1.7188705064394074e-06, "loss": 0.6449, "step": 5684 }, { "epoch": 0.82, "grad_norm": 5.1833953857421875, "learning_rate": 1.718760468247187e-06, "loss": 0.6459, "step": 5685 }, { "epoch": 0.83, "grad_norm": 5.131239891052246, "learning_rate": 1.7186504120475908e-06, "loss": 0.7726, "step": 5686 }, { "epoch": 0.83, "grad_norm": 6.151742935180664, "learning_rate": 1.7185403378433763e-06, "loss": 0.8004, "step": 5687 }, { "epoch": 0.83, "grad_norm": 5.36899995803833, "learning_rate": 1.7184302456373013e-06, "loss": 0.7203, "step": 5688 }, { "epoch": 0.83, "grad_norm": 5.4854044914245605, "learning_rate": 1.7183201354321238e-06, "loss": 0.7567, "step": 5689 }, { "epoch": 0.83, "grad_norm": 4.868624687194824, "learning_rate": 1.7182100072306023e-06, "loss": 0.6423, "step": 5690 }, { "epoch": 0.83, "grad_norm": 5.182790756225586, "learning_rate": 1.718099861035496e-06, "loss": 0.6764, "step": 5691 }, { "epoch": 0.83, "grad_norm": 5.742170333862305, "learning_rate": 1.7179896968495647e-06, "loss": 0.7448, "step": 5692 }, { "epoch": 0.83, "grad_norm": 5.689289093017578, "learning_rate": 1.7178795146755678e-06, "loss": 0.7074, "step": 5693 }, { "epoch": 0.83, "grad_norm": 6.169633865356445, "learning_rate": 1.7177693145162663e-06, "loss": 0.6876, "step": 5694 }, { "epoch": 0.83, "grad_norm": 5.154599189758301, "learning_rate": 1.7176590963744211e-06, "loss": 0.6675, "step": 5695 }, { "epoch": 0.83, "grad_norm": 5.055568695068359, "learning_rate": 1.7175488602527928e-06, "loss": 0.6038, "step": 5696 }, { "epoch": 0.83, "grad_norm": 5.317521572113037, "learning_rate": 1.7174386061541443e-06, "loss": 0.664, "step": 5697 }, { "epoch": 0.83, "grad_norm": 5.809977054595947, "learning_rate": 1.7173283340812367e-06, "loss": 0.7269, "step": 5698 }, { "epoch": 0.83, "grad_norm": 5.797369956970215, "learning_rate": 1.7172180440368335e-06, "loss": 0.6407, "step": 5699 }, { "epoch": 0.83, "grad_norm": 5.612239360809326, "learning_rate": 1.7171077360236974e-06, "loss": 0.6909, "step": 5700 }, { "epoch": 0.83, "grad_norm": 6.082217693328857, "learning_rate": 1.7169974100445924e-06, "loss": 0.7495, "step": 5701 }, { "epoch": 0.83, "grad_norm": 5.245639801025391, "learning_rate": 1.716887066102282e-06, "loss": 0.7052, "step": 5702 }, { "epoch": 0.83, "grad_norm": 5.487147331237793, "learning_rate": 1.7167767041995313e-06, "loss": 0.6605, "step": 5703 }, { "epoch": 0.83, "grad_norm": 5.680065631866455, "learning_rate": 1.7166663243391048e-06, "loss": 0.7147, "step": 5704 }, { "epoch": 0.83, "grad_norm": 5.700134754180908, "learning_rate": 1.716555926523768e-06, "loss": 0.6678, "step": 5705 }, { "epoch": 0.83, "grad_norm": 5.376965045928955, "learning_rate": 1.7164455107562868e-06, "loss": 0.7315, "step": 5706 }, { "epoch": 0.83, "grad_norm": 5.694920063018799, "learning_rate": 1.7163350770394271e-06, "loss": 0.7373, "step": 5707 }, { "epoch": 0.83, "grad_norm": 5.568963050842285, "learning_rate": 1.7162246253759563e-06, "loss": 0.704, "step": 5708 }, { "epoch": 0.83, "grad_norm": 5.4835662841796875, "learning_rate": 1.716114155768641e-06, "loss": 0.7125, "step": 5709 }, { "epoch": 0.83, "grad_norm": 5.478121757507324, "learning_rate": 1.7160036682202495e-06, "loss": 0.6999, "step": 5710 }, { "epoch": 0.83, "grad_norm": 5.713088035583496, "learning_rate": 1.715893162733549e-06, "loss": 0.7024, "step": 5711 }, { "epoch": 0.83, "grad_norm": 5.571751594543457, "learning_rate": 1.715782639311309e-06, "loss": 0.7739, "step": 5712 }, { "epoch": 0.83, "grad_norm": 5.779954433441162, "learning_rate": 1.7156720979562975e-06, "loss": 0.6447, "step": 5713 }, { "epoch": 0.83, "grad_norm": 5.528238296508789, "learning_rate": 1.7155615386712848e-06, "loss": 0.7321, "step": 5714 }, { "epoch": 0.83, "grad_norm": 5.493594169616699, "learning_rate": 1.71545096145904e-06, "loss": 0.7069, "step": 5715 }, { "epoch": 0.83, "grad_norm": 5.345720291137695, "learning_rate": 1.7153403663223344e-06, "loss": 0.6035, "step": 5716 }, { "epoch": 0.83, "grad_norm": 5.498055934906006, "learning_rate": 1.7152297532639377e-06, "loss": 0.7951, "step": 5717 }, { "epoch": 0.83, "grad_norm": 5.465034484863281, "learning_rate": 1.7151191222866222e-06, "loss": 0.6417, "step": 5718 }, { "epoch": 0.83, "grad_norm": 6.387982368469238, "learning_rate": 1.7150084733931584e-06, "loss": 0.7007, "step": 5719 }, { "epoch": 0.83, "grad_norm": 5.613661766052246, "learning_rate": 1.7148978065863196e-06, "loss": 0.6763, "step": 5720 }, { "epoch": 0.83, "grad_norm": 6.781586647033691, "learning_rate": 1.7147871218688776e-06, "loss": 0.7248, "step": 5721 }, { "epoch": 0.83, "grad_norm": 5.512853145599365, "learning_rate": 1.7146764192436058e-06, "loss": 0.7452, "step": 5722 }, { "epoch": 0.83, "grad_norm": 5.306417942047119, "learning_rate": 1.7145656987132773e-06, "loss": 0.7242, "step": 5723 }, { "epoch": 0.83, "grad_norm": 5.536375045776367, "learning_rate": 1.7144549602806664e-06, "loss": 0.6868, "step": 5724 }, { "epoch": 0.83, "grad_norm": 5.547291278839111, "learning_rate": 1.7143442039485475e-06, "loss": 0.6911, "step": 5725 }, { "epoch": 0.83, "grad_norm": 5.81825065612793, "learning_rate": 1.7142334297196952e-06, "loss": 0.7188, "step": 5726 }, { "epoch": 0.83, "grad_norm": 5.461364269256592, "learning_rate": 1.7141226375968847e-06, "loss": 0.6609, "step": 5727 }, { "epoch": 0.83, "grad_norm": 5.748346328735352, "learning_rate": 1.714011827582892e-06, "loss": 0.6694, "step": 5728 }, { "epoch": 0.83, "grad_norm": 5.56567907333374, "learning_rate": 1.7139009996804926e-06, "loss": 0.7201, "step": 5729 }, { "epoch": 0.83, "grad_norm": 6.453197956085205, "learning_rate": 1.7137901538924644e-06, "loss": 0.7644, "step": 5730 }, { "epoch": 0.83, "grad_norm": 5.564727306365967, "learning_rate": 1.713679290221583e-06, "loss": 0.6242, "step": 5731 }, { "epoch": 0.83, "grad_norm": 5.782577991485596, "learning_rate": 1.713568408670627e-06, "loss": 0.6954, "step": 5732 }, { "epoch": 0.83, "grad_norm": 5.700122833251953, "learning_rate": 1.713457509242374e-06, "loss": 0.7559, "step": 5733 }, { "epoch": 0.83, "grad_norm": 5.46115779876709, "learning_rate": 1.713346591939602e-06, "loss": 0.8121, "step": 5734 }, { "epoch": 0.83, "grad_norm": 5.923142433166504, "learning_rate": 1.7132356567650903e-06, "loss": 0.671, "step": 5735 }, { "epoch": 0.83, "grad_norm": 5.839783191680908, "learning_rate": 1.7131247037216184e-06, "loss": 0.6707, "step": 5736 }, { "epoch": 0.83, "grad_norm": 5.471228122711182, "learning_rate": 1.7130137328119656e-06, "loss": 0.712, "step": 5737 }, { "epoch": 0.83, "grad_norm": 5.242403984069824, "learning_rate": 1.7129027440389122e-06, "loss": 0.7323, "step": 5738 }, { "epoch": 0.83, "grad_norm": 5.166909217834473, "learning_rate": 1.7127917374052387e-06, "loss": 0.6988, "step": 5739 }, { "epoch": 0.83, "grad_norm": 5.682516574859619, "learning_rate": 1.7126807129137269e-06, "loss": 0.6761, "step": 5740 }, { "epoch": 0.83, "grad_norm": 5.213180065155029, "learning_rate": 1.7125696705671574e-06, "loss": 0.6856, "step": 5741 }, { "epoch": 0.83, "grad_norm": 5.374606132507324, "learning_rate": 1.7124586103683125e-06, "loss": 0.7155, "step": 5742 }, { "epoch": 0.83, "grad_norm": 5.980424880981445, "learning_rate": 1.712347532319975e-06, "loss": 0.7338, "step": 5743 }, { "epoch": 0.83, "grad_norm": 5.348902702331543, "learning_rate": 1.7122364364249276e-06, "loss": 0.7181, "step": 5744 }, { "epoch": 0.83, "grad_norm": 6.008825778961182, "learning_rate": 1.7121253226859534e-06, "loss": 0.6341, "step": 5745 }, { "epoch": 0.83, "grad_norm": 5.728278160095215, "learning_rate": 1.7120141911058363e-06, "loss": 0.7626, "step": 5746 }, { "epoch": 0.83, "grad_norm": 5.718256950378418, "learning_rate": 1.7119030416873605e-06, "loss": 0.6445, "step": 5747 }, { "epoch": 0.83, "grad_norm": 5.670624732971191, "learning_rate": 1.711791874433311e-06, "loss": 0.6859, "step": 5748 }, { "epoch": 0.83, "grad_norm": 5.910782814025879, "learning_rate": 1.711680689346472e-06, "loss": 0.7169, "step": 5749 }, { "epoch": 0.83, "grad_norm": 5.556833267211914, "learning_rate": 1.7115694864296307e-06, "loss": 0.7357, "step": 5750 }, { "epoch": 0.83, "grad_norm": 5.2545952796936035, "learning_rate": 1.7114582656855713e-06, "loss": 0.7029, "step": 5751 }, { "epoch": 0.83, "grad_norm": 4.978628158569336, "learning_rate": 1.7113470271170814e-06, "loss": 0.5921, "step": 5752 }, { "epoch": 0.83, "grad_norm": 5.644101142883301, "learning_rate": 1.7112357707269474e-06, "loss": 0.7174, "step": 5753 }, { "epoch": 0.83, "grad_norm": 5.4518303871154785, "learning_rate": 1.7111244965179568e-06, "loss": 0.7281, "step": 5754 }, { "epoch": 0.84, "grad_norm": 5.65669584274292, "learning_rate": 1.7110132044928972e-06, "loss": 0.6884, "step": 5755 }, { "epoch": 0.84, "grad_norm": 5.590267181396484, "learning_rate": 1.7109018946545573e-06, "loss": 0.7595, "step": 5756 }, { "epoch": 0.84, "grad_norm": 5.2337517738342285, "learning_rate": 1.7107905670057255e-06, "loss": 0.6465, "step": 5757 }, { "epoch": 0.84, "grad_norm": 5.21783447265625, "learning_rate": 1.7106792215491909e-06, "loss": 0.6155, "step": 5758 }, { "epoch": 0.84, "grad_norm": 5.193148136138916, "learning_rate": 1.7105678582877433e-06, "loss": 0.6963, "step": 5759 }, { "epoch": 0.84, "grad_norm": 6.5837321281433105, "learning_rate": 1.7104564772241723e-06, "loss": 0.6568, "step": 5760 }, { "epoch": 0.84, "grad_norm": 5.345496654510498, "learning_rate": 1.7103450783612687e-06, "loss": 0.6902, "step": 5761 }, { "epoch": 0.84, "grad_norm": 5.303006649017334, "learning_rate": 1.7102336617018233e-06, "loss": 0.6745, "step": 5762 }, { "epoch": 0.84, "grad_norm": 5.081277370452881, "learning_rate": 1.7101222272486277e-06, "loss": 0.6996, "step": 5763 }, { "epoch": 0.84, "grad_norm": 5.494955062866211, "learning_rate": 1.7100107750044733e-06, "loss": 0.6635, "step": 5764 }, { "epoch": 0.84, "grad_norm": 5.641407489776611, "learning_rate": 1.7098993049721528e-06, "loss": 0.6874, "step": 5765 }, { "epoch": 0.84, "grad_norm": 5.531400680541992, "learning_rate": 1.7097878171544587e-06, "loss": 0.6599, "step": 5766 }, { "epoch": 0.84, "grad_norm": 5.52615213394165, "learning_rate": 1.709676311554184e-06, "loss": 0.7225, "step": 5767 }, { "epoch": 0.84, "grad_norm": 5.118847846984863, "learning_rate": 1.7095647881741227e-06, "loss": 0.6401, "step": 5768 }, { "epoch": 0.84, "grad_norm": 5.285511493682861, "learning_rate": 1.7094532470170682e-06, "loss": 0.7193, "step": 5769 }, { "epoch": 0.84, "grad_norm": 5.672338485717773, "learning_rate": 1.7093416880858154e-06, "loss": 0.7124, "step": 5770 }, { "epoch": 0.84, "grad_norm": 6.005270004272461, "learning_rate": 1.709230111383159e-06, "loss": 0.7135, "step": 5771 }, { "epoch": 0.84, "grad_norm": 4.903735637664795, "learning_rate": 1.7091185169118948e-06, "loss": 0.683, "step": 5772 }, { "epoch": 0.84, "grad_norm": 5.722630977630615, "learning_rate": 1.7090069046748184e-06, "loss": 0.7108, "step": 5773 }, { "epoch": 0.84, "grad_norm": 5.91562032699585, "learning_rate": 1.7088952746747258e-06, "loss": 0.6764, "step": 5774 }, { "epoch": 0.84, "grad_norm": 6.258601665496826, "learning_rate": 1.708783626914414e-06, "loss": 0.6615, "step": 5775 }, { "epoch": 0.84, "grad_norm": 5.866129398345947, "learning_rate": 1.7086719613966802e-06, "loss": 0.7646, "step": 5776 }, { "epoch": 0.84, "grad_norm": 5.153557777404785, "learning_rate": 1.7085602781243217e-06, "loss": 0.6805, "step": 5777 }, { "epoch": 0.84, "grad_norm": 5.422337055206299, "learning_rate": 1.708448577100137e-06, "loss": 0.6684, "step": 5778 }, { "epoch": 0.84, "grad_norm": 5.454303741455078, "learning_rate": 1.7083368583269241e-06, "loss": 0.591, "step": 5779 }, { "epoch": 0.84, "grad_norm": 5.829459190368652, "learning_rate": 1.7082251218074822e-06, "loss": 0.6028, "step": 5780 }, { "epoch": 0.84, "grad_norm": 5.606133460998535, "learning_rate": 1.7081133675446107e-06, "loss": 0.7073, "step": 5781 }, { "epoch": 0.84, "grad_norm": 5.915542125701904, "learning_rate": 1.7080015955411092e-06, "loss": 0.718, "step": 5782 }, { "epoch": 0.84, "grad_norm": 6.190553188323975, "learning_rate": 1.7078898057997782e-06, "loss": 0.7248, "step": 5783 }, { "epoch": 0.84, "grad_norm": 5.187803745269775, "learning_rate": 1.707777998323418e-06, "loss": 0.6513, "step": 5784 }, { "epoch": 0.84, "grad_norm": 5.703601837158203, "learning_rate": 1.7076661731148303e-06, "loss": 0.6127, "step": 5785 }, { "epoch": 0.84, "grad_norm": 5.461270332336426, "learning_rate": 1.7075543301768165e-06, "loss": 0.7726, "step": 5786 }, { "epoch": 0.84, "grad_norm": 5.911987781524658, "learning_rate": 1.7074424695121788e-06, "loss": 0.8137, "step": 5787 }, { "epoch": 0.84, "grad_norm": 5.902593612670898, "learning_rate": 1.707330591123719e-06, "loss": 0.7107, "step": 5788 }, { "epoch": 0.84, "grad_norm": 5.168912887573242, "learning_rate": 1.7072186950142408e-06, "loss": 0.6773, "step": 5789 }, { "epoch": 0.84, "grad_norm": 5.971529006958008, "learning_rate": 1.7071067811865474e-06, "loss": 0.7926, "step": 5790 }, { "epoch": 0.84, "grad_norm": 5.789879322052002, "learning_rate": 1.7069948496434426e-06, "loss": 0.6489, "step": 5791 }, { "epoch": 0.84, "grad_norm": 5.8638505935668945, "learning_rate": 1.7068829003877303e-06, "loss": 0.7509, "step": 5792 }, { "epoch": 0.84, "grad_norm": 5.437698841094971, "learning_rate": 1.7067709334222159e-06, "loss": 0.747, "step": 5793 }, { "epoch": 0.84, "grad_norm": 5.633507251739502, "learning_rate": 1.7066589487497036e-06, "loss": 0.6733, "step": 5794 }, { "epoch": 0.84, "grad_norm": 5.923013687133789, "learning_rate": 1.7065469463729998e-06, "loss": 0.7335, "step": 5795 }, { "epoch": 0.84, "grad_norm": 5.467535495758057, "learning_rate": 1.7064349262949106e-06, "loss": 0.7352, "step": 5796 }, { "epoch": 0.84, "grad_norm": 6.447782516479492, "learning_rate": 1.7063228885182417e-06, "loss": 0.6923, "step": 5797 }, { "epoch": 0.84, "grad_norm": 6.216842174530029, "learning_rate": 1.7062108330458006e-06, "loss": 0.6386, "step": 5798 }, { "epoch": 0.84, "grad_norm": 5.697006702423096, "learning_rate": 1.7060987598803947e-06, "loss": 0.7094, "step": 5799 }, { "epoch": 0.84, "grad_norm": 6.197235107421875, "learning_rate": 1.7059866690248317e-06, "loss": 0.6417, "step": 5800 }, { "epoch": 0.84, "grad_norm": 5.803918361663818, "learning_rate": 1.7058745604819195e-06, "loss": 0.7286, "step": 5801 }, { "epoch": 0.84, "grad_norm": 5.8733086585998535, "learning_rate": 1.7057624342544675e-06, "loss": 0.7381, "step": 5802 }, { "epoch": 0.84, "grad_norm": 4.938147068023682, "learning_rate": 1.705650290345284e-06, "loss": 0.7426, "step": 5803 }, { "epoch": 0.84, "grad_norm": 5.75097131729126, "learning_rate": 1.7055381287571793e-06, "loss": 0.7427, "step": 5804 }, { "epoch": 0.84, "grad_norm": 5.616538047790527, "learning_rate": 1.7054259494929628e-06, "loss": 0.7057, "step": 5805 }, { "epoch": 0.84, "grad_norm": 5.495739936828613, "learning_rate": 1.7053137525554458e-06, "loss": 0.6978, "step": 5806 }, { "epoch": 0.84, "grad_norm": 6.102136611938477, "learning_rate": 1.7052015379474387e-06, "loss": 0.7454, "step": 5807 }, { "epoch": 0.84, "grad_norm": 5.648647308349609, "learning_rate": 1.7050893056717529e-06, "loss": 0.7075, "step": 5808 }, { "epoch": 0.84, "grad_norm": 5.674544334411621, "learning_rate": 1.7049770557312e-06, "loss": 0.7068, "step": 5809 }, { "epoch": 0.84, "grad_norm": 5.774613380432129, "learning_rate": 1.7048647881285927e-06, "loss": 0.7887, "step": 5810 }, { "epoch": 0.84, "grad_norm": 6.479659080505371, "learning_rate": 1.704752502866743e-06, "loss": 0.7933, "step": 5811 }, { "epoch": 0.84, "grad_norm": 5.613788604736328, "learning_rate": 1.7046401999484647e-06, "loss": 0.7692, "step": 5812 }, { "epoch": 0.84, "grad_norm": 5.692448616027832, "learning_rate": 1.7045278793765711e-06, "loss": 0.7156, "step": 5813 }, { "epoch": 0.84, "grad_norm": 5.332667350769043, "learning_rate": 1.7044155411538763e-06, "loss": 0.7344, "step": 5814 }, { "epoch": 0.84, "grad_norm": 5.568146228790283, "learning_rate": 1.7043031852831946e-06, "loss": 0.746, "step": 5815 }, { "epoch": 0.84, "grad_norm": 5.929332733154297, "learning_rate": 1.7041908117673409e-06, "loss": 0.6591, "step": 5816 }, { "epoch": 0.84, "grad_norm": 5.2741007804870605, "learning_rate": 1.704078420609131e-06, "loss": 0.6105, "step": 5817 }, { "epoch": 0.84, "grad_norm": 6.4404120445251465, "learning_rate": 1.7039660118113796e-06, "loss": 0.7657, "step": 5818 }, { "epoch": 0.84, "grad_norm": 5.479064464569092, "learning_rate": 1.7038535853769042e-06, "loss": 0.7469, "step": 5819 }, { "epoch": 0.84, "grad_norm": 5.461103916168213, "learning_rate": 1.7037411413085205e-06, "loss": 0.746, "step": 5820 }, { "epoch": 0.84, "grad_norm": 5.352972030639648, "learning_rate": 1.703628679609046e-06, "loss": 0.7448, "step": 5821 }, { "epoch": 0.84, "grad_norm": 5.920674800872803, "learning_rate": 1.7035162002812983e-06, "loss": 0.6984, "step": 5822 }, { "epoch": 0.84, "grad_norm": 5.668343544006348, "learning_rate": 1.7034037033280955e-06, "loss": 0.6534, "step": 5823 }, { "epoch": 0.85, "grad_norm": 5.352575302124023, "learning_rate": 1.7032911887522555e-06, "loss": 0.6363, "step": 5824 }, { "epoch": 0.85, "grad_norm": 5.4980010986328125, "learning_rate": 1.7031786565565976e-06, "loss": 0.7359, "step": 5825 }, { "epoch": 0.85, "grad_norm": 5.1890034675598145, "learning_rate": 1.703066106743941e-06, "loss": 0.6879, "step": 5826 }, { "epoch": 0.85, "grad_norm": 6.076160430908203, "learning_rate": 1.7029535393171056e-06, "loss": 0.6941, "step": 5827 }, { "epoch": 0.85, "grad_norm": 5.460625648498535, "learning_rate": 1.7028409542789112e-06, "loss": 0.7061, "step": 5828 }, { "epoch": 0.85, "grad_norm": 5.092258453369141, "learning_rate": 1.7027283516321789e-06, "loss": 0.6436, "step": 5829 }, { "epoch": 0.85, "grad_norm": 6.393161296844482, "learning_rate": 1.7026157313797293e-06, "loss": 0.7189, "step": 5830 }, { "epoch": 0.85, "grad_norm": 6.254638671875, "learning_rate": 1.7025030935243842e-06, "loss": 0.6978, "step": 5831 }, { "epoch": 0.85, "grad_norm": 5.7836079597473145, "learning_rate": 1.7023904380689657e-06, "loss": 0.6699, "step": 5832 }, { "epoch": 0.85, "grad_norm": 5.911808013916016, "learning_rate": 1.702277765016296e-06, "loss": 0.6773, "step": 5833 }, { "epoch": 0.85, "grad_norm": 5.1413187980651855, "learning_rate": 1.7021650743691983e-06, "loss": 0.7047, "step": 5834 }, { "epoch": 0.85, "grad_norm": 5.283314228057861, "learning_rate": 1.702052366130495e-06, "loss": 0.7066, "step": 5835 }, { "epoch": 0.85, "grad_norm": 5.3394317626953125, "learning_rate": 1.7019396403030105e-06, "loss": 0.6892, "step": 5836 }, { "epoch": 0.85, "grad_norm": 5.39585542678833, "learning_rate": 1.7018268968895694e-06, "loss": 0.6852, "step": 5837 }, { "epoch": 0.85, "grad_norm": 5.433244228363037, "learning_rate": 1.701714135892995e-06, "loss": 0.6338, "step": 5838 }, { "epoch": 0.85, "grad_norm": 5.469672203063965, "learning_rate": 1.7016013573161134e-06, "loss": 0.7739, "step": 5839 }, { "epoch": 0.85, "grad_norm": 5.65163516998291, "learning_rate": 1.7014885611617497e-06, "loss": 0.6659, "step": 5840 }, { "epoch": 0.85, "grad_norm": 4.9213714599609375, "learning_rate": 1.7013757474327302e-06, "loss": 0.7033, "step": 5841 }, { "epoch": 0.85, "grad_norm": 5.01348876953125, "learning_rate": 1.7012629161318808e-06, "loss": 0.6451, "step": 5842 }, { "epoch": 0.85, "grad_norm": 6.66959810256958, "learning_rate": 1.7011500672620285e-06, "loss": 0.7819, "step": 5843 }, { "epoch": 0.85, "grad_norm": 6.402530193328857, "learning_rate": 1.7010372008260004e-06, "loss": 0.7651, "step": 5844 }, { "epoch": 0.85, "grad_norm": 5.915642738342285, "learning_rate": 1.7009243168266243e-06, "loss": 0.7401, "step": 5845 }, { "epoch": 0.85, "grad_norm": 5.48899507522583, "learning_rate": 1.7008114152667282e-06, "loss": 0.721, "step": 5846 }, { "epoch": 0.85, "grad_norm": 5.7028374671936035, "learning_rate": 1.700698496149141e-06, "loss": 0.8034, "step": 5847 }, { "epoch": 0.85, "grad_norm": 5.3816728591918945, "learning_rate": 1.7005855594766916e-06, "loss": 0.7375, "step": 5848 }, { "epoch": 0.85, "grad_norm": 5.888521671295166, "learning_rate": 1.700472605252209e-06, "loss": 0.6682, "step": 5849 }, { "epoch": 0.85, "grad_norm": 5.149168014526367, "learning_rate": 1.7003596334785237e-06, "loss": 0.65, "step": 5850 }, { "epoch": 0.85, "grad_norm": 5.084324836730957, "learning_rate": 1.7002466441584655e-06, "loss": 0.6727, "step": 5851 }, { "epoch": 0.85, "grad_norm": 5.507781505584717, "learning_rate": 1.700133637294866e-06, "loss": 0.6503, "step": 5852 }, { "epoch": 0.85, "grad_norm": 5.646584510803223, "learning_rate": 1.700020612890555e-06, "loss": 0.6998, "step": 5853 }, { "epoch": 0.85, "grad_norm": 8.162293434143066, "learning_rate": 1.6999075709483654e-06, "loss": 0.7185, "step": 5854 }, { "epoch": 0.85, "grad_norm": 5.443903923034668, "learning_rate": 1.6997945114711287e-06, "loss": 0.6438, "step": 5855 }, { "epoch": 0.85, "grad_norm": 6.271603584289551, "learning_rate": 1.6996814344616776e-06, "loss": 0.6717, "step": 5856 }, { "epoch": 0.85, "grad_norm": 5.509932518005371, "learning_rate": 1.6995683399228452e-06, "loss": 0.6589, "step": 5857 }, { "epoch": 0.85, "grad_norm": 6.403256416320801, "learning_rate": 1.6994552278574645e-06, "loss": 0.6766, "step": 5858 }, { "epoch": 0.85, "grad_norm": 5.374630451202393, "learning_rate": 1.6993420982683697e-06, "loss": 0.7213, "step": 5859 }, { "epoch": 0.85, "grad_norm": 5.851114749908447, "learning_rate": 1.6992289511583948e-06, "loss": 0.6932, "step": 5860 }, { "epoch": 0.85, "grad_norm": 5.843222141265869, "learning_rate": 1.6991157865303748e-06, "loss": 0.6301, "step": 5861 }, { "epoch": 0.85, "grad_norm": 5.954365253448486, "learning_rate": 1.6990026043871447e-06, "loss": 0.7003, "step": 5862 }, { "epoch": 0.85, "grad_norm": 5.027729511260986, "learning_rate": 1.69888940473154e-06, "loss": 0.7024, "step": 5863 }, { "epoch": 0.85, "grad_norm": 5.374891757965088, "learning_rate": 1.698776187566397e-06, "loss": 0.7099, "step": 5864 }, { "epoch": 0.85, "grad_norm": 5.841552257537842, "learning_rate": 1.698662952894552e-06, "loss": 0.7768, "step": 5865 }, { "epoch": 0.85, "grad_norm": 5.64689826965332, "learning_rate": 1.698549700718842e-06, "loss": 0.7357, "step": 5866 }, { "epoch": 0.85, "grad_norm": 5.490314483642578, "learning_rate": 1.6984364310421037e-06, "loss": 0.6585, "step": 5867 }, { "epoch": 0.85, "grad_norm": 5.874777793884277, "learning_rate": 1.6983231438671761e-06, "loss": 0.6751, "step": 5868 }, { "epoch": 0.85, "grad_norm": 5.616330623626709, "learning_rate": 1.6982098391968967e-06, "loss": 0.7273, "step": 5869 }, { "epoch": 0.85, "grad_norm": 6.041133880615234, "learning_rate": 1.698096517034104e-06, "loss": 0.6866, "step": 5870 }, { "epoch": 0.85, "grad_norm": 4.660490036010742, "learning_rate": 1.6979831773816374e-06, "loss": 0.6473, "step": 5871 }, { "epoch": 0.85, "grad_norm": 5.1296234130859375, "learning_rate": 1.6978698202423364e-06, "loss": 0.6106, "step": 5872 }, { "epoch": 0.85, "grad_norm": 5.980855464935303, "learning_rate": 1.6977564456190413e-06, "loss": 0.7687, "step": 5873 }, { "epoch": 0.85, "grad_norm": 5.503617286682129, "learning_rate": 1.6976430535145916e-06, "loss": 0.755, "step": 5874 }, { "epoch": 0.85, "grad_norm": 5.414279460906982, "learning_rate": 1.6975296439318291e-06, "loss": 0.6344, "step": 5875 }, { "epoch": 0.85, "grad_norm": 5.447123050689697, "learning_rate": 1.6974162168735948e-06, "loss": 0.6584, "step": 5876 }, { "epoch": 0.85, "grad_norm": 6.248067855834961, "learning_rate": 1.6973027723427302e-06, "loss": 0.7892, "step": 5877 }, { "epoch": 0.85, "grad_norm": 5.9902119636535645, "learning_rate": 1.6971893103420779e-06, "loss": 0.7224, "step": 5878 }, { "epoch": 0.85, "grad_norm": 6.116232872009277, "learning_rate": 1.69707583087448e-06, "loss": 0.6434, "step": 5879 }, { "epoch": 0.85, "grad_norm": 5.55814790725708, "learning_rate": 1.6969623339427797e-06, "loss": 0.6954, "step": 5880 }, { "epoch": 0.85, "grad_norm": 5.403077125549316, "learning_rate": 1.6968488195498207e-06, "loss": 0.7109, "step": 5881 }, { "epoch": 0.85, "grad_norm": 5.634218692779541, "learning_rate": 1.6967352876984466e-06, "loss": 0.7184, "step": 5882 }, { "epoch": 0.85, "grad_norm": 6.43429708480835, "learning_rate": 1.6966217383915023e-06, "loss": 0.6572, "step": 5883 }, { "epoch": 0.85, "grad_norm": 5.440638065338135, "learning_rate": 1.6965081716318318e-06, "loss": 0.6529, "step": 5884 }, { "epoch": 0.85, "grad_norm": 5.901697158813477, "learning_rate": 1.6963945874222812e-06, "loss": 0.7679, "step": 5885 }, { "epoch": 0.85, "grad_norm": 5.202841281890869, "learning_rate": 1.6962809857656957e-06, "loss": 0.7338, "step": 5886 }, { "epoch": 0.85, "grad_norm": 6.127538681030273, "learning_rate": 1.6961673666649214e-06, "loss": 0.7251, "step": 5887 }, { "epoch": 0.85, "grad_norm": 5.517414093017578, "learning_rate": 1.6960537301228046e-06, "loss": 0.6778, "step": 5888 }, { "epoch": 0.85, "grad_norm": 5.8208794593811035, "learning_rate": 1.6959400761421927e-06, "loss": 0.7277, "step": 5889 }, { "epoch": 0.85, "grad_norm": 5.23307466506958, "learning_rate": 1.6958264047259333e-06, "loss": 0.6899, "step": 5890 }, { "epoch": 0.85, "grad_norm": 5.4363579750061035, "learning_rate": 1.6957127158768737e-06, "loss": 0.7913, "step": 5891 }, { "epoch": 0.85, "grad_norm": 6.1073174476623535, "learning_rate": 1.6955990095978621e-06, "loss": 0.7005, "step": 5892 }, { "epoch": 0.86, "grad_norm": 6.17573356628418, "learning_rate": 1.695485285891748e-06, "loss": 0.6933, "step": 5893 }, { "epoch": 0.86, "grad_norm": 6.130719184875488, "learning_rate": 1.69537154476138e-06, "loss": 0.7554, "step": 5894 }, { "epoch": 0.86, "grad_norm": 5.276605606079102, "learning_rate": 1.6952577862096075e-06, "loss": 0.7437, "step": 5895 }, { "epoch": 0.86, "grad_norm": 5.437506198883057, "learning_rate": 1.6951440102392812e-06, "loss": 0.7696, "step": 5896 }, { "epoch": 0.86, "grad_norm": 5.836306095123291, "learning_rate": 1.695030216853251e-06, "loss": 0.6791, "step": 5897 }, { "epoch": 0.86, "grad_norm": 5.508167266845703, "learning_rate": 1.6949164060543682e-06, "loss": 0.7099, "step": 5898 }, { "epoch": 0.86, "grad_norm": 5.443430423736572, "learning_rate": 1.694802577845484e-06, "loss": 0.6461, "step": 5899 }, { "epoch": 0.86, "grad_norm": 5.167452335357666, "learning_rate": 1.6946887322294498e-06, "loss": 0.7215, "step": 5900 }, { "epoch": 0.86, "grad_norm": 5.316621780395508, "learning_rate": 1.6945748692091187e-06, "loss": 0.6637, "step": 5901 }, { "epoch": 0.86, "grad_norm": 6.150884628295898, "learning_rate": 1.6944609887873423e-06, "loss": 0.713, "step": 5902 }, { "epoch": 0.86, "grad_norm": 5.660771369934082, "learning_rate": 1.6943470909669745e-06, "loss": 0.6191, "step": 5903 }, { "epoch": 0.86, "grad_norm": 5.9466471672058105, "learning_rate": 1.6942331757508684e-06, "loss": 0.7125, "step": 5904 }, { "epoch": 0.86, "grad_norm": 6.112617015838623, "learning_rate": 1.694119243141878e-06, "loss": 0.6553, "step": 5905 }, { "epoch": 0.86, "grad_norm": 5.044610500335693, "learning_rate": 1.694005293142858e-06, "loss": 0.5862, "step": 5906 }, { "epoch": 0.86, "grad_norm": 5.594594478607178, "learning_rate": 1.693891325756663e-06, "loss": 0.717, "step": 5907 }, { "epoch": 0.86, "grad_norm": 5.515918254852295, "learning_rate": 1.693777340986148e-06, "loss": 0.6702, "step": 5908 }, { "epoch": 0.86, "grad_norm": 5.8910231590271, "learning_rate": 1.6936633388341692e-06, "loss": 0.7854, "step": 5909 }, { "epoch": 0.86, "grad_norm": 5.372871398925781, "learning_rate": 1.6935493193035826e-06, "loss": 0.7517, "step": 5910 }, { "epoch": 0.86, "grad_norm": 6.0665388107299805, "learning_rate": 1.6934352823972447e-06, "loss": 0.7644, "step": 5911 }, { "epoch": 0.86, "grad_norm": 5.070135116577148, "learning_rate": 1.6933212281180125e-06, "loss": 0.6957, "step": 5912 }, { "epoch": 0.86, "grad_norm": 5.527318000793457, "learning_rate": 1.6932071564687434e-06, "loss": 0.7077, "step": 5913 }, { "epoch": 0.86, "grad_norm": 6.228707790374756, "learning_rate": 1.6930930674522954e-06, "loss": 0.7501, "step": 5914 }, { "epoch": 0.86, "grad_norm": 6.178271770477295, "learning_rate": 1.6929789610715268e-06, "loss": 0.7476, "step": 5915 }, { "epoch": 0.86, "grad_norm": 4.738365173339844, "learning_rate": 1.6928648373292963e-06, "loss": 0.6346, "step": 5916 }, { "epoch": 0.86, "grad_norm": 5.863784313201904, "learning_rate": 1.692750696228463e-06, "loss": 0.7314, "step": 5917 }, { "epoch": 0.86, "grad_norm": 5.639469623565674, "learning_rate": 1.6926365377718863e-06, "loss": 0.6998, "step": 5918 }, { "epoch": 0.86, "grad_norm": 6.008709907531738, "learning_rate": 1.692522361962427e-06, "loss": 0.6381, "step": 5919 }, { "epoch": 0.86, "grad_norm": 5.12064266204834, "learning_rate": 1.692408168802945e-06, "loss": 0.6336, "step": 5920 }, { "epoch": 0.86, "grad_norm": 6.898004055023193, "learning_rate": 1.6922939582963016e-06, "loss": 0.7665, "step": 5921 }, { "epoch": 0.86, "grad_norm": 6.282734394073486, "learning_rate": 1.6921797304453579e-06, "loss": 0.786, "step": 5922 }, { "epoch": 0.86, "grad_norm": 5.861415386199951, "learning_rate": 1.6920654852529752e-06, "loss": 0.704, "step": 5923 }, { "epoch": 0.86, "grad_norm": 5.0919599533081055, "learning_rate": 1.6919512227220168e-06, "loss": 0.667, "step": 5924 }, { "epoch": 0.86, "grad_norm": 4.985459804534912, "learning_rate": 1.6918369428553447e-06, "loss": 0.6535, "step": 5925 }, { "epoch": 0.86, "grad_norm": 5.283481597900391, "learning_rate": 1.691722645655822e-06, "loss": 0.661, "step": 5926 }, { "epoch": 0.86, "grad_norm": 5.2308735847473145, "learning_rate": 1.6916083311263124e-06, "loss": 0.684, "step": 5927 }, { "epoch": 0.86, "grad_norm": 5.919754505157471, "learning_rate": 1.6914939992696798e-06, "loss": 0.8378, "step": 5928 }, { "epoch": 0.86, "grad_norm": 5.56190824508667, "learning_rate": 1.6913796500887884e-06, "loss": 0.6764, "step": 5929 }, { "epoch": 0.86, "grad_norm": 5.720040321350098, "learning_rate": 1.6912652835865036e-06, "loss": 0.6815, "step": 5930 }, { "epoch": 0.86, "grad_norm": 5.518938064575195, "learning_rate": 1.6911508997656903e-06, "loss": 0.6127, "step": 5931 }, { "epoch": 0.86, "grad_norm": 5.697559356689453, "learning_rate": 1.6910364986292138e-06, "loss": 0.6915, "step": 5932 }, { "epoch": 0.86, "grad_norm": 5.725381851196289, "learning_rate": 1.6909220801799412e-06, "loss": 0.7237, "step": 5933 }, { "epoch": 0.86, "grad_norm": 5.360852241516113, "learning_rate": 1.690807644420738e-06, "loss": 0.7343, "step": 5934 }, { "epoch": 0.86, "grad_norm": 6.722293853759766, "learning_rate": 1.6906931913544718e-06, "loss": 0.8247, "step": 5935 }, { "epoch": 0.86, "grad_norm": 5.288537502288818, "learning_rate": 1.69057872098401e-06, "loss": 0.6752, "step": 5936 }, { "epoch": 0.86, "grad_norm": 5.157972812652588, "learning_rate": 1.6904642333122205e-06, "loss": 0.7524, "step": 5937 }, { "epoch": 0.86, "grad_norm": 5.660336017608643, "learning_rate": 1.6903497283419719e-06, "loss": 0.6931, "step": 5938 }, { "epoch": 0.86, "grad_norm": 5.799731254577637, "learning_rate": 1.6902352060761321e-06, "loss": 0.6761, "step": 5939 }, { "epoch": 0.86, "grad_norm": 5.199906826019287, "learning_rate": 1.6901206665175707e-06, "loss": 0.6345, "step": 5940 }, { "epoch": 0.86, "grad_norm": 5.119263648986816, "learning_rate": 1.6900061096691577e-06, "loss": 0.66, "step": 5941 }, { "epoch": 0.86, "grad_norm": 5.923231601715088, "learning_rate": 1.6898915355337625e-06, "loss": 0.7328, "step": 5942 }, { "epoch": 0.86, "grad_norm": 5.462947845458984, "learning_rate": 1.6897769441142558e-06, "loss": 0.7077, "step": 5943 }, { "epoch": 0.86, "grad_norm": 5.388694763183594, "learning_rate": 1.6896623354135083e-06, "loss": 0.7822, "step": 5944 }, { "epoch": 0.86, "grad_norm": 5.26947021484375, "learning_rate": 1.689547709434392e-06, "loss": 0.7085, "step": 5945 }, { "epoch": 0.86, "grad_norm": 5.728264808654785, "learning_rate": 1.689433066179778e-06, "loss": 0.8122, "step": 5946 }, { "epoch": 0.86, "grad_norm": 5.7628374099731445, "learning_rate": 1.6893184056525388e-06, "loss": 0.7911, "step": 5947 }, { "epoch": 0.86, "grad_norm": 5.2867751121521, "learning_rate": 1.6892037278555468e-06, "loss": 0.6633, "step": 5948 }, { "epoch": 0.86, "grad_norm": 4.720650672912598, "learning_rate": 1.6890890327916756e-06, "loss": 0.7134, "step": 5949 }, { "epoch": 0.86, "grad_norm": 5.8536272048950195, "learning_rate": 1.688974320463798e-06, "loss": 0.728, "step": 5950 }, { "epoch": 0.86, "grad_norm": 5.451415061950684, "learning_rate": 1.6888595908747887e-06, "loss": 0.6895, "step": 5951 }, { "epoch": 0.86, "grad_norm": 5.052707195281982, "learning_rate": 1.6887448440275214e-06, "loss": 0.6277, "step": 5952 }, { "epoch": 0.86, "grad_norm": 4.771960258483887, "learning_rate": 1.688630079924871e-06, "loss": 0.7378, "step": 5953 }, { "epoch": 0.86, "grad_norm": 5.875432014465332, "learning_rate": 1.6885152985697131e-06, "loss": 0.7536, "step": 5954 }, { "epoch": 0.86, "grad_norm": 6.161439418792725, "learning_rate": 1.6884004999649228e-06, "loss": 0.7646, "step": 5955 }, { "epoch": 0.86, "grad_norm": 6.033910274505615, "learning_rate": 1.6882856841133766e-06, "loss": 0.7405, "step": 5956 }, { "epoch": 0.86, "grad_norm": 5.478484630584717, "learning_rate": 1.6881708510179512e-06, "loss": 0.7086, "step": 5957 }, { "epoch": 0.86, "grad_norm": 5.267884254455566, "learning_rate": 1.6880560006815227e-06, "loss": 0.6578, "step": 5958 }, { "epoch": 0.86, "grad_norm": 5.773019313812256, "learning_rate": 1.6879411331069697e-06, "loss": 0.7541, "step": 5959 }, { "epoch": 0.86, "grad_norm": 5.7088623046875, "learning_rate": 1.6878262482971691e-06, "loss": 0.6894, "step": 5960 }, { "epoch": 0.86, "grad_norm": 5.273496150970459, "learning_rate": 1.6877113462549995e-06, "loss": 0.6425, "step": 5961 }, { "epoch": 0.87, "grad_norm": 6.497229099273682, "learning_rate": 1.6875964269833393e-06, "loss": 0.6564, "step": 5962 }, { "epoch": 0.87, "grad_norm": 5.405148506164551, "learning_rate": 1.687481490485068e-06, "loss": 0.6421, "step": 5963 }, { "epoch": 0.87, "grad_norm": 6.212515830993652, "learning_rate": 1.6873665367630654e-06, "loss": 0.8027, "step": 5964 }, { "epoch": 0.87, "grad_norm": 5.663358211517334, "learning_rate": 1.6872515658202105e-06, "loss": 0.7302, "step": 5965 }, { "epoch": 0.87, "grad_norm": 5.079867839813232, "learning_rate": 1.6871365776593846e-06, "loss": 0.6919, "step": 5966 }, { "epoch": 0.87, "grad_norm": 4.862299919128418, "learning_rate": 1.6870215722834683e-06, "loss": 0.686, "step": 5967 }, { "epoch": 0.87, "grad_norm": 5.18772029876709, "learning_rate": 1.6869065496953425e-06, "loss": 0.6424, "step": 5968 }, { "epoch": 0.87, "grad_norm": 5.304441452026367, "learning_rate": 1.6867915098978895e-06, "loss": 0.643, "step": 5969 }, { "epoch": 0.87, "grad_norm": 5.480730056762695, "learning_rate": 1.686676452893991e-06, "loss": 0.6844, "step": 5970 }, { "epoch": 0.87, "grad_norm": 5.665870666503906, "learning_rate": 1.68656137868653e-06, "loss": 0.7201, "step": 5971 }, { "epoch": 0.87, "grad_norm": 5.420271873474121, "learning_rate": 1.6864462872783888e-06, "loss": 0.7115, "step": 5972 }, { "epoch": 0.87, "grad_norm": 5.3515400886535645, "learning_rate": 1.686331178672451e-06, "loss": 0.6357, "step": 5973 }, { "epoch": 0.87, "grad_norm": 5.516329288482666, "learning_rate": 1.6862160528716012e-06, "loss": 0.7128, "step": 5974 }, { "epoch": 0.87, "grad_norm": 5.738288402557373, "learning_rate": 1.686100909878723e-06, "loss": 0.6495, "step": 5975 }, { "epoch": 0.87, "grad_norm": 5.792908668518066, "learning_rate": 1.6859857496967012e-06, "loss": 0.7458, "step": 5976 }, { "epoch": 0.87, "grad_norm": 5.656815052032471, "learning_rate": 1.6858705723284211e-06, "loss": 0.7248, "step": 5977 }, { "epoch": 0.87, "grad_norm": 5.504546165466309, "learning_rate": 1.6857553777767684e-06, "loss": 0.735, "step": 5978 }, { "epoch": 0.87, "grad_norm": 5.964632511138916, "learning_rate": 1.685640166044629e-06, "loss": 0.7413, "step": 5979 }, { "epoch": 0.87, "grad_norm": 5.715765476226807, "learning_rate": 1.6855249371348889e-06, "loss": 0.6672, "step": 5980 }, { "epoch": 0.87, "grad_norm": 5.9075164794921875, "learning_rate": 1.6854096910504358e-06, "loss": 0.775, "step": 5981 }, { "epoch": 0.87, "grad_norm": 5.392979621887207, "learning_rate": 1.6852944277941563e-06, "loss": 0.6879, "step": 5982 }, { "epoch": 0.87, "grad_norm": 5.416827201843262, "learning_rate": 1.6851791473689384e-06, "loss": 0.6506, "step": 5983 }, { "epoch": 0.87, "grad_norm": 5.801290035247803, "learning_rate": 1.6850638497776703e-06, "loss": 0.6997, "step": 5984 }, { "epoch": 0.87, "grad_norm": 5.561807632446289, "learning_rate": 1.6849485350232406e-06, "loss": 0.7007, "step": 5985 }, { "epoch": 0.87, "grad_norm": 5.0591325759887695, "learning_rate": 1.6848332031085382e-06, "loss": 0.6862, "step": 5986 }, { "epoch": 0.87, "grad_norm": 5.789338111877441, "learning_rate": 1.6847178540364526e-06, "loss": 0.7348, "step": 5987 }, { "epoch": 0.87, "grad_norm": 5.265780448913574, "learning_rate": 1.6846024878098738e-06, "loss": 0.7241, "step": 5988 }, { "epoch": 0.87, "grad_norm": 5.064777374267578, "learning_rate": 1.684487104431692e-06, "loss": 0.7315, "step": 5989 }, { "epoch": 0.87, "grad_norm": 5.300419807434082, "learning_rate": 1.684371703904798e-06, "loss": 0.6825, "step": 5990 }, { "epoch": 0.87, "grad_norm": 5.595425128936768, "learning_rate": 1.6842562862320827e-06, "loss": 0.6649, "step": 5991 }, { "epoch": 0.87, "grad_norm": 5.482059001922607, "learning_rate": 1.6841408514164384e-06, "loss": 0.66, "step": 5992 }, { "epoch": 0.87, "grad_norm": 5.397197723388672, "learning_rate": 1.6840253994607565e-06, "loss": 0.713, "step": 5993 }, { "epoch": 0.87, "grad_norm": 6.039696216583252, "learning_rate": 1.6839099303679296e-06, "loss": 0.7268, "step": 5994 }, { "epoch": 0.87, "grad_norm": 5.0638108253479, "learning_rate": 1.6837944441408504e-06, "loss": 0.7183, "step": 5995 }, { "epoch": 0.87, "grad_norm": 5.364447116851807, "learning_rate": 1.6836789407824127e-06, "loss": 0.7167, "step": 5996 }, { "epoch": 0.87, "grad_norm": 5.742953300476074, "learning_rate": 1.6835634202955102e-06, "loss": 0.6704, "step": 5997 }, { "epoch": 0.87, "grad_norm": 5.2462663650512695, "learning_rate": 1.6834478826830367e-06, "loss": 0.7058, "step": 5998 }, { "epoch": 0.87, "grad_norm": 5.905352592468262, "learning_rate": 1.6833323279478871e-06, "loss": 0.7546, "step": 5999 }, { "epoch": 0.87, "grad_norm": 6.179976940155029, "learning_rate": 1.683216756092956e-06, "loss": 0.7679, "step": 6000 }, { "epoch": 0.87, "grad_norm": 5.690495491027832, "learning_rate": 1.6831011671211394e-06, "loss": 0.6346, "step": 6001 }, { "epoch": 0.87, "grad_norm": 5.922985553741455, "learning_rate": 1.682985561035333e-06, "loss": 0.7004, "step": 6002 }, { "epoch": 0.87, "grad_norm": 5.579043388366699, "learning_rate": 1.682869937838433e-06, "loss": 0.7325, "step": 6003 }, { "epoch": 0.87, "grad_norm": 5.4685444831848145, "learning_rate": 1.6827542975333361e-06, "loss": 0.7492, "step": 6004 }, { "epoch": 0.87, "grad_norm": 5.299015045166016, "learning_rate": 1.6826386401229402e-06, "loss": 0.6855, "step": 6005 }, { "epoch": 0.87, "grad_norm": 6.203155040740967, "learning_rate": 1.682522965610142e-06, "loss": 0.6691, "step": 6006 }, { "epoch": 0.87, "grad_norm": 5.877907752990723, "learning_rate": 1.6824072739978397e-06, "loss": 0.6504, "step": 6007 }, { "epoch": 0.87, "grad_norm": 6.202373027801514, "learning_rate": 1.6822915652889322e-06, "loss": 0.7093, "step": 6008 }, { "epoch": 0.87, "grad_norm": 5.9907989501953125, "learning_rate": 1.682175839486318e-06, "loss": 0.6899, "step": 6009 }, { "epoch": 0.87, "grad_norm": 5.406871795654297, "learning_rate": 1.6820600965928966e-06, "loss": 0.6927, "step": 6010 }, { "epoch": 0.87, "grad_norm": 5.707271099090576, "learning_rate": 1.6819443366115676e-06, "loss": 0.6544, "step": 6011 }, { "epoch": 0.87, "grad_norm": 4.828095436096191, "learning_rate": 1.6818285595452314e-06, "loss": 0.6641, "step": 6012 }, { "epoch": 0.87, "grad_norm": 6.236393928527832, "learning_rate": 1.6817127653967883e-06, "loss": 0.7248, "step": 6013 }, { "epoch": 0.87, "grad_norm": 5.827785968780518, "learning_rate": 1.6815969541691399e-06, "loss": 0.6423, "step": 6014 }, { "epoch": 0.87, "grad_norm": 5.326496124267578, "learning_rate": 1.6814811258651871e-06, "loss": 0.6581, "step": 6015 }, { "epoch": 0.87, "grad_norm": 6.786286354064941, "learning_rate": 1.681365280487832e-06, "loss": 0.737, "step": 6016 }, { "epoch": 0.87, "grad_norm": 5.151379108428955, "learning_rate": 1.681249418039977e-06, "loss": 0.7647, "step": 6017 }, { "epoch": 0.87, "grad_norm": 5.00508451461792, "learning_rate": 1.6811335385245245e-06, "loss": 0.6653, "step": 6018 }, { "epoch": 0.87, "grad_norm": 6.00458288192749, "learning_rate": 1.6810176419443782e-06, "loss": 0.7402, "step": 6019 }, { "epoch": 0.87, "grad_norm": 5.454413414001465, "learning_rate": 1.6809017283024412e-06, "loss": 0.6569, "step": 6020 }, { "epoch": 0.87, "grad_norm": 5.946305274963379, "learning_rate": 1.6807857976016178e-06, "loss": 0.6727, "step": 6021 }, { "epoch": 0.87, "grad_norm": 5.866055965423584, "learning_rate": 1.6806698498448127e-06, "loss": 0.7563, "step": 6022 }, { "epoch": 0.87, "grad_norm": 6.132169246673584, "learning_rate": 1.68055388503493e-06, "loss": 0.74, "step": 6023 }, { "epoch": 0.87, "grad_norm": 5.779871463775635, "learning_rate": 1.6804379031748757e-06, "loss": 0.6744, "step": 6024 }, { "epoch": 0.87, "grad_norm": 6.131735801696777, "learning_rate": 1.6803219042675553e-06, "loss": 0.7236, "step": 6025 }, { "epoch": 0.87, "grad_norm": 5.19408655166626, "learning_rate": 1.6802058883158749e-06, "loss": 0.6708, "step": 6026 }, { "epoch": 0.87, "grad_norm": 5.800826549530029, "learning_rate": 1.6800898553227415e-06, "loss": 0.8145, "step": 6027 }, { "epoch": 0.87, "grad_norm": 5.7942328453063965, "learning_rate": 1.6799738052910617e-06, "loss": 0.6487, "step": 6028 }, { "epoch": 0.87, "grad_norm": 4.726451873779297, "learning_rate": 1.6798577382237431e-06, "loss": 0.6348, "step": 6029 }, { "epoch": 0.87, "grad_norm": 5.571574687957764, "learning_rate": 1.6797416541236934e-06, "loss": 0.7186, "step": 6030 }, { "epoch": 0.88, "grad_norm": 5.5081377029418945, "learning_rate": 1.6796255529938207e-06, "loss": 0.7102, "step": 6031 }, { "epoch": 0.88, "grad_norm": 5.348936557769775, "learning_rate": 1.6795094348370347e-06, "loss": 0.7266, "step": 6032 }, { "epoch": 0.88, "grad_norm": 6.0600762367248535, "learning_rate": 1.6793932996562433e-06, "loss": 0.7384, "step": 6033 }, { "epoch": 0.88, "grad_norm": 5.372790336608887, "learning_rate": 1.679277147454357e-06, "loss": 0.7543, "step": 6034 }, { "epoch": 0.88, "grad_norm": 5.973076343536377, "learning_rate": 1.6791609782342856e-06, "loss": 0.7733, "step": 6035 }, { "epoch": 0.88, "grad_norm": 5.6950907707214355, "learning_rate": 1.679044791998939e-06, "loss": 0.6206, "step": 6036 }, { "epoch": 0.88, "grad_norm": 5.4739460945129395, "learning_rate": 1.6789285887512287e-06, "loss": 0.7796, "step": 6037 }, { "epoch": 0.88, "grad_norm": 5.413051128387451, "learning_rate": 1.6788123684940657e-06, "loss": 0.6965, "step": 6038 }, { "epoch": 0.88, "grad_norm": 6.257065773010254, "learning_rate": 1.6786961312303618e-06, "loss": 0.7361, "step": 6039 }, { "epoch": 0.88, "grad_norm": 6.634852886199951, "learning_rate": 1.6785798769630292e-06, "loss": 0.7874, "step": 6040 }, { "epoch": 0.88, "grad_norm": 5.706172466278076, "learning_rate": 1.67846360569498e-06, "loss": 0.7044, "step": 6041 }, { "epoch": 0.88, "grad_norm": 5.13106632232666, "learning_rate": 1.678347317429128e-06, "loss": 0.6665, "step": 6042 }, { "epoch": 0.88, "grad_norm": 6.007413864135742, "learning_rate": 1.678231012168386e-06, "loss": 0.7321, "step": 6043 }, { "epoch": 0.88, "grad_norm": 5.040871620178223, "learning_rate": 1.678114689915668e-06, "loss": 0.6744, "step": 6044 }, { "epoch": 0.88, "grad_norm": 5.50083589553833, "learning_rate": 1.6779983506738882e-06, "loss": 0.7308, "step": 6045 }, { "epoch": 0.88, "grad_norm": 5.658187389373779, "learning_rate": 1.6778819944459615e-06, "loss": 0.7501, "step": 6046 }, { "epoch": 0.88, "grad_norm": 6.111437797546387, "learning_rate": 1.6777656212348026e-06, "loss": 0.7349, "step": 6047 }, { "epoch": 0.88, "grad_norm": 5.116199493408203, "learning_rate": 1.6776492310433276e-06, "loss": 0.6979, "step": 6048 }, { "epoch": 0.88, "grad_norm": 5.832332134246826, "learning_rate": 1.677532823874452e-06, "loss": 0.7539, "step": 6049 }, { "epoch": 0.88, "grad_norm": 6.249711036682129, "learning_rate": 1.6774163997310927e-06, "loss": 0.8153, "step": 6050 }, { "epoch": 0.88, "grad_norm": 5.239418029785156, "learning_rate": 1.6772999586161655e-06, "loss": 0.6751, "step": 6051 }, { "epoch": 0.88, "grad_norm": 4.968181610107422, "learning_rate": 1.677183500532589e-06, "loss": 0.6736, "step": 6052 }, { "epoch": 0.88, "grad_norm": 4.961709499359131, "learning_rate": 1.6770670254832799e-06, "loss": 0.6327, "step": 6053 }, { "epoch": 0.88, "grad_norm": 5.529365539550781, "learning_rate": 1.6769505334711566e-06, "loss": 0.7481, "step": 6054 }, { "epoch": 0.88, "grad_norm": 5.217462062835693, "learning_rate": 1.6768340244991376e-06, "loss": 0.63, "step": 6055 }, { "epoch": 0.88, "grad_norm": 6.496467590332031, "learning_rate": 1.676717498570142e-06, "loss": 0.6681, "step": 6056 }, { "epoch": 0.88, "grad_norm": 5.969038486480713, "learning_rate": 1.6766009556870891e-06, "loss": 0.7395, "step": 6057 }, { "epoch": 0.88, "grad_norm": 6.491067886352539, "learning_rate": 1.6764843958528982e-06, "loss": 0.7588, "step": 6058 }, { "epoch": 0.88, "grad_norm": 5.787112236022949, "learning_rate": 1.6763678190704905e-06, "loss": 0.7434, "step": 6059 }, { "epoch": 0.88, "grad_norm": 6.237318515777588, "learning_rate": 1.6762512253427857e-06, "loss": 0.7415, "step": 6060 }, { "epoch": 0.88, "grad_norm": 5.678683757781982, "learning_rate": 1.6761346146727054e-06, "loss": 0.7452, "step": 6061 }, { "epoch": 0.88, "grad_norm": 6.376708030700684, "learning_rate": 1.6760179870631705e-06, "loss": 0.7442, "step": 6062 }, { "epoch": 0.88, "grad_norm": 6.185734748840332, "learning_rate": 1.6759013425171038e-06, "loss": 0.6502, "step": 6063 }, { "epoch": 0.88, "grad_norm": 6.016416549682617, "learning_rate": 1.6757846810374271e-06, "loss": 0.6787, "step": 6064 }, { "epoch": 0.88, "grad_norm": 5.633566379547119, "learning_rate": 1.675668002627063e-06, "loss": 0.7303, "step": 6065 }, { "epoch": 0.88, "grad_norm": 5.828696250915527, "learning_rate": 1.6755513072889354e-06, "loss": 0.6387, "step": 6066 }, { "epoch": 0.88, "grad_norm": 5.719795227050781, "learning_rate": 1.6754345950259672e-06, "loss": 0.6604, "step": 6067 }, { "epoch": 0.88, "grad_norm": 5.003561496734619, "learning_rate": 1.6753178658410827e-06, "loss": 0.6463, "step": 6068 }, { "epoch": 0.88, "grad_norm": 5.862034320831299, "learning_rate": 1.6752011197372062e-06, "loss": 0.7771, "step": 6069 }, { "epoch": 0.88, "grad_norm": 6.053001403808594, "learning_rate": 1.675084356717263e-06, "loss": 0.7564, "step": 6070 }, { "epoch": 0.88, "grad_norm": 5.248356342315674, "learning_rate": 1.674967576784178e-06, "loss": 0.653, "step": 6071 }, { "epoch": 0.88, "grad_norm": 5.5040202140808105, "learning_rate": 1.6748507799408771e-06, "loss": 0.7525, "step": 6072 }, { "epoch": 0.88, "grad_norm": 5.16509485244751, "learning_rate": 1.6747339661902866e-06, "loss": 0.6757, "step": 6073 }, { "epoch": 0.88, "grad_norm": 5.8516669273376465, "learning_rate": 1.6746171355353327e-06, "loss": 0.7266, "step": 6074 }, { "epoch": 0.88, "grad_norm": 4.9035563468933105, "learning_rate": 1.6745002879789426e-06, "loss": 0.7329, "step": 6075 }, { "epoch": 0.88, "grad_norm": 5.8832688331604, "learning_rate": 1.6743834235240437e-06, "loss": 0.822, "step": 6076 }, { "epoch": 0.88, "grad_norm": 4.662444114685059, "learning_rate": 1.674266542173564e-06, "loss": 0.6138, "step": 6077 }, { "epoch": 0.88, "grad_norm": 5.5338358879089355, "learning_rate": 1.6741496439304314e-06, "loss": 0.6467, "step": 6078 }, { "epoch": 0.88, "grad_norm": 5.81838846206665, "learning_rate": 1.674032728797575e-06, "loss": 0.6954, "step": 6079 }, { "epoch": 0.88, "grad_norm": 5.818342208862305, "learning_rate": 1.673915796777924e-06, "loss": 0.722, "step": 6080 }, { "epoch": 0.88, "grad_norm": 5.447833061218262, "learning_rate": 1.6737988478744077e-06, "loss": 0.6389, "step": 6081 }, { "epoch": 0.88, "grad_norm": 5.662792205810547, "learning_rate": 1.6736818820899558e-06, "loss": 0.7107, "step": 6082 }, { "epoch": 0.88, "grad_norm": 5.207115173339844, "learning_rate": 1.6735648994274988e-06, "loss": 0.709, "step": 6083 }, { "epoch": 0.88, "grad_norm": 5.700115203857422, "learning_rate": 1.6734478998899681e-06, "loss": 0.7067, "step": 6084 }, { "epoch": 0.88, "grad_norm": 5.452834606170654, "learning_rate": 1.6733308834802944e-06, "loss": 0.6475, "step": 6085 }, { "epoch": 0.88, "grad_norm": 5.666233062744141, "learning_rate": 1.6732138502014092e-06, "loss": 0.6805, "step": 6086 }, { "epoch": 0.88, "grad_norm": 5.505597114562988, "learning_rate": 1.6730968000562453e-06, "loss": 0.7479, "step": 6087 }, { "epoch": 0.88, "grad_norm": 5.143280506134033, "learning_rate": 1.6729797330477347e-06, "loss": 0.6689, "step": 6088 }, { "epoch": 0.88, "grad_norm": 5.51775598526001, "learning_rate": 1.67286264917881e-06, "loss": 0.7135, "step": 6089 }, { "epoch": 0.88, "grad_norm": 6.22239875793457, "learning_rate": 1.672745548452405e-06, "loss": 0.6359, "step": 6090 }, { "epoch": 0.88, "grad_norm": 6.408420562744141, "learning_rate": 1.6726284308714534e-06, "loss": 0.6675, "step": 6091 }, { "epoch": 0.88, "grad_norm": 5.426215171813965, "learning_rate": 1.6725112964388895e-06, "loss": 0.7521, "step": 6092 }, { "epoch": 0.88, "grad_norm": 5.327319145202637, "learning_rate": 1.6723941451576476e-06, "loss": 0.7807, "step": 6093 }, { "epoch": 0.88, "grad_norm": 5.729563236236572, "learning_rate": 1.672276977030663e-06, "loss": 0.7599, "step": 6094 }, { "epoch": 0.88, "grad_norm": 4.94111442565918, "learning_rate": 1.6721597920608713e-06, "loss": 0.7086, "step": 6095 }, { "epoch": 0.88, "grad_norm": 5.779621601104736, "learning_rate": 1.6720425902512077e-06, "loss": 0.6994, "step": 6096 }, { "epoch": 0.88, "grad_norm": 5.816324710845947, "learning_rate": 1.6719253716046092e-06, "loss": 0.6288, "step": 6097 }, { "epoch": 0.88, "grad_norm": 5.987802505493164, "learning_rate": 1.6718081361240123e-06, "loss": 0.6738, "step": 6098 }, { "epoch": 0.88, "grad_norm": 5.511940956115723, "learning_rate": 1.6716908838123542e-06, "loss": 0.7279, "step": 6099 }, { "epoch": 0.89, "grad_norm": 5.256914138793945, "learning_rate": 1.6715736146725723e-06, "loss": 0.6993, "step": 6100 }, { "epoch": 0.89, "grad_norm": 5.562422275543213, "learning_rate": 1.6714563287076047e-06, "loss": 0.7177, "step": 6101 }, { "epoch": 0.89, "grad_norm": 5.295334339141846, "learning_rate": 1.6713390259203896e-06, "loss": 0.8437, "step": 6102 }, { "epoch": 0.89, "grad_norm": 5.781185150146484, "learning_rate": 1.671221706313866e-06, "loss": 0.7771, "step": 6103 }, { "epoch": 0.89, "grad_norm": 5.100071430206299, "learning_rate": 1.6711043698909735e-06, "loss": 0.7068, "step": 6104 }, { "epoch": 0.89, "grad_norm": 5.234009742736816, "learning_rate": 1.6709870166546513e-06, "loss": 0.7194, "step": 6105 }, { "epoch": 0.89, "grad_norm": 5.0321946144104, "learning_rate": 1.6708696466078397e-06, "loss": 0.6579, "step": 6106 }, { "epoch": 0.89, "grad_norm": 4.985918045043945, "learning_rate": 1.6707522597534789e-06, "loss": 0.7466, "step": 6107 }, { "epoch": 0.89, "grad_norm": 5.721141815185547, "learning_rate": 1.6706348560945104e-06, "loss": 0.7178, "step": 6108 }, { "epoch": 0.89, "grad_norm": 5.198652267456055, "learning_rate": 1.6705174356338752e-06, "loss": 0.6969, "step": 6109 }, { "epoch": 0.89, "grad_norm": 5.19527006149292, "learning_rate": 1.670399998374515e-06, "loss": 0.7405, "step": 6110 }, { "epoch": 0.89, "grad_norm": 6.121150016784668, "learning_rate": 1.670282544319372e-06, "loss": 0.7369, "step": 6111 }, { "epoch": 0.89, "grad_norm": 5.2192559242248535, "learning_rate": 1.6701650734713892e-06, "loss": 0.6553, "step": 6112 }, { "epoch": 0.89, "grad_norm": 5.143217086791992, "learning_rate": 1.6700475858335095e-06, "loss": 0.6704, "step": 6113 }, { "epoch": 0.89, "grad_norm": 5.080495834350586, "learning_rate": 1.6699300814086759e-06, "loss": 0.6144, "step": 6114 }, { "epoch": 0.89, "grad_norm": 5.940269470214844, "learning_rate": 1.6698125601998328e-06, "loss": 0.6308, "step": 6115 }, { "epoch": 0.89, "grad_norm": 6.040947914123535, "learning_rate": 1.6696950222099244e-06, "loss": 0.7721, "step": 6116 }, { "epoch": 0.89, "grad_norm": 5.808293342590332, "learning_rate": 1.6695774674418952e-06, "loss": 0.7433, "step": 6117 }, { "epoch": 0.89, "grad_norm": 5.243400573730469, "learning_rate": 1.6694598958986905e-06, "loss": 0.6201, "step": 6118 }, { "epoch": 0.89, "grad_norm": 5.913314342498779, "learning_rate": 1.669342307583256e-06, "loss": 0.7137, "step": 6119 }, { "epoch": 0.89, "grad_norm": 5.737931251525879, "learning_rate": 1.6692247024985374e-06, "loss": 0.8462, "step": 6120 }, { "epoch": 0.89, "grad_norm": 5.5336809158325195, "learning_rate": 1.6691070806474814e-06, "loss": 0.7156, "step": 6121 }, { "epoch": 0.89, "grad_norm": 6.0666279792785645, "learning_rate": 1.6689894420330346e-06, "loss": 0.7283, "step": 6122 }, { "epoch": 0.89, "grad_norm": 5.513828754425049, "learning_rate": 1.6688717866581443e-06, "loss": 0.6832, "step": 6123 }, { "epoch": 0.89, "grad_norm": 5.22543478012085, "learning_rate": 1.6687541145257582e-06, "loss": 0.7187, "step": 6124 }, { "epoch": 0.89, "grad_norm": 6.041311740875244, "learning_rate": 1.6686364256388243e-06, "loss": 0.7392, "step": 6125 }, { "epoch": 0.89, "grad_norm": 5.380559921264648, "learning_rate": 1.6685187200002915e-06, "loss": 0.743, "step": 6126 }, { "epoch": 0.89, "grad_norm": 5.563342094421387, "learning_rate": 1.6684009976131082e-06, "loss": 0.6883, "step": 6127 }, { "epoch": 0.89, "grad_norm": 5.518142223358154, "learning_rate": 1.668283258480224e-06, "loss": 0.7281, "step": 6128 }, { "epoch": 0.89, "grad_norm": 5.768915176391602, "learning_rate": 1.6681655026045884e-06, "loss": 0.6655, "step": 6129 }, { "epoch": 0.89, "grad_norm": 5.112680435180664, "learning_rate": 1.668047729989152e-06, "loss": 0.6428, "step": 6130 }, { "epoch": 0.89, "grad_norm": 6.024447441101074, "learning_rate": 1.6679299406368651e-06, "loss": 0.7063, "step": 6131 }, { "epoch": 0.89, "grad_norm": 5.428074359893799, "learning_rate": 1.6678121345506787e-06, "loss": 0.7111, "step": 6132 }, { "epoch": 0.89, "grad_norm": 5.278076648712158, "learning_rate": 1.6676943117335447e-06, "loss": 0.6944, "step": 6133 }, { "epoch": 0.89, "grad_norm": 5.182358741760254, "learning_rate": 1.6675764721884141e-06, "loss": 0.6923, "step": 6134 }, { "epoch": 0.89, "grad_norm": 4.9664740562438965, "learning_rate": 1.6674586159182403e-06, "loss": 0.6843, "step": 6135 }, { "epoch": 0.89, "grad_norm": 5.866415023803711, "learning_rate": 1.6673407429259746e-06, "loss": 0.7599, "step": 6136 }, { "epoch": 0.89, "grad_norm": 5.424510955810547, "learning_rate": 1.6672228532145715e-06, "loss": 0.6568, "step": 6137 }, { "epoch": 0.89, "grad_norm": 6.645849227905273, "learning_rate": 1.667104946786984e-06, "loss": 0.7657, "step": 6138 }, { "epoch": 0.89, "grad_norm": 5.722145080566406, "learning_rate": 1.6669870236461656e-06, "loss": 0.7446, "step": 6139 }, { "epoch": 0.89, "grad_norm": 5.854652404785156, "learning_rate": 1.6668690837950713e-06, "loss": 0.7788, "step": 6140 }, { "epoch": 0.89, "grad_norm": 5.633068084716797, "learning_rate": 1.6667511272366554e-06, "loss": 0.7214, "step": 6141 }, { "epoch": 0.89, "grad_norm": 5.084627151489258, "learning_rate": 1.6666331539738735e-06, "loss": 0.6503, "step": 6142 }, { "epoch": 0.89, "grad_norm": 5.230985164642334, "learning_rate": 1.6665151640096814e-06, "loss": 0.6575, "step": 6143 }, { "epoch": 0.89, "grad_norm": 6.402996063232422, "learning_rate": 1.6663971573470343e-06, "loss": 0.6911, "step": 6144 }, { "epoch": 0.89, "grad_norm": 6.002127647399902, "learning_rate": 1.6662791339888896e-06, "loss": 0.6737, "step": 6145 }, { "epoch": 0.89, "grad_norm": 5.448378562927246, "learning_rate": 1.6661610939382038e-06, "loss": 0.5926, "step": 6146 }, { "epoch": 0.89, "grad_norm": 5.380817413330078, "learning_rate": 1.6660430371979342e-06, "loss": 0.7597, "step": 6147 }, { "epoch": 0.89, "grad_norm": 5.453980922698975, "learning_rate": 1.6659249637710382e-06, "loss": 0.6028, "step": 6148 }, { "epoch": 0.89, "grad_norm": 5.688469409942627, "learning_rate": 1.6658068736604744e-06, "loss": 0.6929, "step": 6149 }, { "epoch": 0.89, "grad_norm": 5.999241352081299, "learning_rate": 1.6656887668692012e-06, "loss": 0.7309, "step": 6150 }, { "epoch": 0.89, "grad_norm": 5.872973442077637, "learning_rate": 1.6655706434001777e-06, "loss": 0.7462, "step": 6151 }, { "epoch": 0.89, "grad_norm": 5.556007385253906, "learning_rate": 1.665452503256363e-06, "loss": 0.6848, "step": 6152 }, { "epoch": 0.89, "grad_norm": 5.943386077880859, "learning_rate": 1.6653343464407172e-06, "loss": 0.731, "step": 6153 }, { "epoch": 0.89, "grad_norm": 5.394578456878662, "learning_rate": 1.6652161729562004e-06, "loss": 0.6812, "step": 6154 }, { "epoch": 0.89, "grad_norm": 5.468529224395752, "learning_rate": 1.665097982805773e-06, "loss": 0.6778, "step": 6155 }, { "epoch": 0.89, "grad_norm": 5.918163299560547, "learning_rate": 1.6649797759923967e-06, "loss": 0.715, "step": 6156 }, { "epoch": 0.89, "grad_norm": 5.5537190437316895, "learning_rate": 1.6648615525190324e-06, "loss": 0.6885, "step": 6157 }, { "epoch": 0.89, "grad_norm": 6.13470983505249, "learning_rate": 1.6647433123886421e-06, "loss": 0.7262, "step": 6158 }, { "epoch": 0.89, "grad_norm": 5.373293399810791, "learning_rate": 1.6646250556041882e-06, "loss": 0.687, "step": 6159 }, { "epoch": 0.89, "grad_norm": 5.567485332489014, "learning_rate": 1.6645067821686336e-06, "loss": 0.6786, "step": 6160 }, { "epoch": 0.89, "grad_norm": 5.775452136993408, "learning_rate": 1.6643884920849414e-06, "loss": 0.7235, "step": 6161 }, { "epoch": 0.89, "grad_norm": 5.317875862121582, "learning_rate": 1.6642701853560747e-06, "loss": 0.7685, "step": 6162 }, { "epoch": 0.89, "grad_norm": 5.371649742126465, "learning_rate": 1.664151861984998e-06, "loss": 0.7309, "step": 6163 }, { "epoch": 0.89, "grad_norm": 5.531702518463135, "learning_rate": 1.6640335219746755e-06, "loss": 0.652, "step": 6164 }, { "epoch": 0.89, "grad_norm": 5.407078266143799, "learning_rate": 1.6639151653280723e-06, "loss": 0.7045, "step": 6165 }, { "epoch": 0.89, "grad_norm": 5.346362113952637, "learning_rate": 1.663796792048153e-06, "loss": 0.6228, "step": 6166 }, { "epoch": 0.89, "grad_norm": 5.181941986083984, "learning_rate": 1.6636784021378839e-06, "loss": 0.7301, "step": 6167 }, { "epoch": 0.89, "grad_norm": 5.642922401428223, "learning_rate": 1.6635599956002307e-06, "loss": 0.6432, "step": 6168 }, { "epoch": 0.9, "grad_norm": 5.5085530281066895, "learning_rate": 1.6634415724381596e-06, "loss": 0.689, "step": 6169 }, { "epoch": 0.9, "grad_norm": 5.9281511306762695, "learning_rate": 1.6633231326546384e-06, "loss": 0.7436, "step": 6170 }, { "epoch": 0.9, "grad_norm": 5.124405860900879, "learning_rate": 1.6632046762526338e-06, "loss": 0.6515, "step": 6171 }, { "epoch": 0.9, "grad_norm": 5.358598232269287, "learning_rate": 1.6630862032351133e-06, "loss": 0.7778, "step": 6172 }, { "epoch": 0.9, "grad_norm": 5.380890369415283, "learning_rate": 1.6629677136050457e-06, "loss": 0.6861, "step": 6173 }, { "epoch": 0.9, "grad_norm": 5.072121620178223, "learning_rate": 1.6628492073653995e-06, "loss": 0.7443, "step": 6174 }, { "epoch": 0.9, "grad_norm": 5.5408477783203125, "learning_rate": 1.6627306845191428e-06, "loss": 0.7516, "step": 6175 }, { "epoch": 0.9, "grad_norm": 5.06044340133667, "learning_rate": 1.662612145069246e-06, "loss": 0.6972, "step": 6176 }, { "epoch": 0.9, "grad_norm": 5.800696849822998, "learning_rate": 1.6624935890186787e-06, "loss": 0.7036, "step": 6177 }, { "epoch": 0.9, "grad_norm": 4.977070331573486, "learning_rate": 1.6623750163704106e-06, "loss": 0.6703, "step": 6178 }, { "epoch": 0.9, "grad_norm": 5.513454437255859, "learning_rate": 1.6622564271274128e-06, "loss": 0.653, "step": 6179 }, { "epoch": 0.9, "grad_norm": 5.544332027435303, "learning_rate": 1.6621378212926565e-06, "loss": 0.6361, "step": 6180 }, { "epoch": 0.9, "grad_norm": 5.722692966461182, "learning_rate": 1.6620191988691129e-06, "loss": 0.6713, "step": 6181 }, { "epoch": 0.9, "grad_norm": 5.329401016235352, "learning_rate": 1.6619005598597538e-06, "loss": 0.712, "step": 6182 }, { "epoch": 0.9, "grad_norm": 4.993104457855225, "learning_rate": 1.6617819042675514e-06, "loss": 0.6655, "step": 6183 }, { "epoch": 0.9, "grad_norm": 5.780825614929199, "learning_rate": 1.6616632320954792e-06, "loss": 0.701, "step": 6184 }, { "epoch": 0.9, "grad_norm": 4.888779163360596, "learning_rate": 1.6615445433465095e-06, "loss": 0.6509, "step": 6185 }, { "epoch": 0.9, "grad_norm": 5.3320231437683105, "learning_rate": 1.6614258380236161e-06, "loss": 0.6726, "step": 6186 }, { "epoch": 0.9, "grad_norm": 5.556058883666992, "learning_rate": 1.6613071161297731e-06, "loss": 0.7756, "step": 6187 }, { "epoch": 0.9, "grad_norm": 5.08627462387085, "learning_rate": 1.6611883776679549e-06, "loss": 0.7031, "step": 6188 }, { "epoch": 0.9, "grad_norm": 5.243672847747803, "learning_rate": 1.6610696226411362e-06, "loss": 0.6626, "step": 6189 }, { "epoch": 0.9, "grad_norm": 5.354911804199219, "learning_rate": 1.6609508510522922e-06, "loss": 0.7346, "step": 6190 }, { "epoch": 0.9, "grad_norm": 4.927003383636475, "learning_rate": 1.6608320629043986e-06, "loss": 0.7535, "step": 6191 }, { "epoch": 0.9, "grad_norm": 5.171302318572998, "learning_rate": 1.6607132582004316e-06, "loss": 0.6829, "step": 6192 }, { "epoch": 0.9, "grad_norm": 5.144439697265625, "learning_rate": 1.660594436943367e-06, "loss": 0.6881, "step": 6193 }, { "epoch": 0.9, "grad_norm": 5.421311855316162, "learning_rate": 1.6604755991361823e-06, "loss": 0.7452, "step": 6194 }, { "epoch": 0.9, "grad_norm": 5.291186809539795, "learning_rate": 1.6603567447818546e-06, "loss": 0.6751, "step": 6195 }, { "epoch": 0.9, "grad_norm": 5.397345542907715, "learning_rate": 1.6602378738833618e-06, "loss": 0.6443, "step": 6196 }, { "epoch": 0.9, "grad_norm": 5.671578884124756, "learning_rate": 1.6601189864436818e-06, "loss": 0.733, "step": 6197 }, { "epoch": 0.9, "grad_norm": 5.5891828536987305, "learning_rate": 1.6600000824657932e-06, "loss": 0.7283, "step": 6198 }, { "epoch": 0.9, "grad_norm": 5.957378387451172, "learning_rate": 1.6598811619526746e-06, "loss": 0.7459, "step": 6199 }, { "epoch": 0.9, "grad_norm": 5.549872398376465, "learning_rate": 1.6597622249073063e-06, "loss": 0.7109, "step": 6200 }, { "epoch": 0.9, "grad_norm": 4.843992710113525, "learning_rate": 1.6596432713326668e-06, "loss": 0.6261, "step": 6201 }, { "epoch": 0.9, "grad_norm": 5.305296897888184, "learning_rate": 1.6595243012317375e-06, "loss": 0.652, "step": 6202 }, { "epoch": 0.9, "grad_norm": 5.408359527587891, "learning_rate": 1.6594053146074981e-06, "loss": 0.6632, "step": 6203 }, { "epoch": 0.9, "grad_norm": 5.157795429229736, "learning_rate": 1.65928631146293e-06, "loss": 0.6394, "step": 6204 }, { "epoch": 0.9, "grad_norm": 5.59539270401001, "learning_rate": 1.6591672918010147e-06, "loss": 0.6706, "step": 6205 }, { "epoch": 0.9, "grad_norm": 5.697614669799805, "learning_rate": 1.6590482556247337e-06, "loss": 0.6661, "step": 6206 }, { "epoch": 0.9, "grad_norm": 4.845430374145508, "learning_rate": 1.6589292029370698e-06, "loss": 0.612, "step": 6207 }, { "epoch": 0.9, "grad_norm": 6.442322731018066, "learning_rate": 1.6588101337410053e-06, "loss": 0.7375, "step": 6208 }, { "epoch": 0.9, "grad_norm": 5.5753912925720215, "learning_rate": 1.6586910480395232e-06, "loss": 0.7413, "step": 6209 }, { "epoch": 0.9, "grad_norm": 5.614189624786377, "learning_rate": 1.6585719458356074e-06, "loss": 0.738, "step": 6210 }, { "epoch": 0.9, "grad_norm": 6.056485652923584, "learning_rate": 1.6584528271322413e-06, "loss": 0.6027, "step": 6211 }, { "epoch": 0.9, "grad_norm": 5.099733352661133, "learning_rate": 1.6583336919324098e-06, "loss": 0.6017, "step": 6212 }, { "epoch": 0.9, "grad_norm": 5.68830680847168, "learning_rate": 1.658214540239097e-06, "loss": 0.7539, "step": 6213 }, { "epoch": 0.9, "grad_norm": 5.892282485961914, "learning_rate": 1.6580953720552886e-06, "loss": 0.6924, "step": 6214 }, { "epoch": 0.9, "grad_norm": 5.789851665496826, "learning_rate": 1.6579761873839697e-06, "loss": 0.7654, "step": 6215 }, { "epoch": 0.9, "grad_norm": 6.6185126304626465, "learning_rate": 1.6578569862281268e-06, "loss": 0.7801, "step": 6216 }, { "epoch": 0.9, "grad_norm": 5.067729473114014, "learning_rate": 1.6577377685907458e-06, "loss": 0.6781, "step": 6217 }, { "epoch": 0.9, "grad_norm": 5.16430139541626, "learning_rate": 1.657618534474814e-06, "loss": 0.6549, "step": 6218 }, { "epoch": 0.9, "grad_norm": 5.371513366699219, "learning_rate": 1.6574992838833182e-06, "loss": 0.6693, "step": 6219 }, { "epoch": 0.9, "grad_norm": 5.501106262207031, "learning_rate": 1.657380016819246e-06, "loss": 0.7825, "step": 6220 }, { "epoch": 0.9, "grad_norm": 6.272478103637695, "learning_rate": 1.6572607332855854e-06, "loss": 0.6867, "step": 6221 }, { "epoch": 0.9, "grad_norm": 5.2721734046936035, "learning_rate": 1.6571414332853254e-06, "loss": 0.6812, "step": 6222 }, { "epoch": 0.9, "grad_norm": 6.036683559417725, "learning_rate": 1.6570221168214546e-06, "loss": 0.7488, "step": 6223 }, { "epoch": 0.9, "grad_norm": 5.6956562995910645, "learning_rate": 1.6569027838969622e-06, "loss": 0.7307, "step": 6224 }, { "epoch": 0.9, "grad_norm": 5.741225242614746, "learning_rate": 1.6567834345148376e-06, "loss": 0.6876, "step": 6225 }, { "epoch": 0.9, "grad_norm": 6.203975677490234, "learning_rate": 1.6566640686780717e-06, "loss": 0.7165, "step": 6226 }, { "epoch": 0.9, "grad_norm": 5.402624130249023, "learning_rate": 1.6565446863896543e-06, "loss": 0.8138, "step": 6227 }, { "epoch": 0.9, "grad_norm": 5.742602825164795, "learning_rate": 1.6564252876525765e-06, "loss": 0.7664, "step": 6228 }, { "epoch": 0.9, "grad_norm": 5.940164566040039, "learning_rate": 1.6563058724698297e-06, "loss": 0.6981, "step": 6229 }, { "epoch": 0.9, "grad_norm": 4.9898200035095215, "learning_rate": 1.6561864408444057e-06, "loss": 0.7405, "step": 6230 }, { "epoch": 0.9, "grad_norm": 5.2126946449279785, "learning_rate": 1.6560669927792966e-06, "loss": 0.7086, "step": 6231 }, { "epoch": 0.9, "grad_norm": 6.235173225402832, "learning_rate": 1.6559475282774951e-06, "loss": 0.8122, "step": 6232 }, { "epoch": 0.9, "grad_norm": 4.874124526977539, "learning_rate": 1.6558280473419939e-06, "loss": 0.6713, "step": 6233 }, { "epoch": 0.9, "grad_norm": 5.426108360290527, "learning_rate": 1.6557085499757867e-06, "loss": 0.7022, "step": 6234 }, { "epoch": 0.9, "grad_norm": 6.315128803253174, "learning_rate": 1.6555890361818672e-06, "loss": 0.7421, "step": 6235 }, { "epoch": 0.9, "grad_norm": 5.7547807693481445, "learning_rate": 1.6554695059632294e-06, "loss": 0.6536, "step": 6236 }, { "epoch": 0.9, "grad_norm": 5.553038120269775, "learning_rate": 1.6553499593228683e-06, "loss": 0.759, "step": 6237 }, { "epoch": 0.91, "grad_norm": 5.956334590911865, "learning_rate": 1.655230396263779e-06, "loss": 0.6788, "step": 6238 }, { "epoch": 0.91, "grad_norm": 5.295639991760254, "learning_rate": 1.6551108167889564e-06, "loss": 0.6826, "step": 6239 }, { "epoch": 0.91, "grad_norm": 5.689136981964111, "learning_rate": 1.654991220901397e-06, "loss": 0.7592, "step": 6240 }, { "epoch": 0.91, "grad_norm": 5.406241416931152, "learning_rate": 1.6548716086040969e-06, "loss": 0.6378, "step": 6241 }, { "epoch": 0.91, "grad_norm": 5.454592704772949, "learning_rate": 1.6547519799000521e-06, "loss": 0.6882, "step": 6242 }, { "epoch": 0.91, "grad_norm": 5.433190822601318, "learning_rate": 1.6546323347922608e-06, "loss": 0.7421, "step": 6243 }, { "epoch": 0.91, "grad_norm": 5.386066436767578, "learning_rate": 1.6545126732837202e-06, "loss": 0.7139, "step": 6244 }, { "epoch": 0.91, "grad_norm": 5.819482803344727, "learning_rate": 1.6543929953774276e-06, "loss": 0.655, "step": 6245 }, { "epoch": 0.91, "grad_norm": 5.143409252166748, "learning_rate": 1.654273301076382e-06, "loss": 0.7271, "step": 6246 }, { "epoch": 0.91, "grad_norm": 5.9324517250061035, "learning_rate": 1.654153590383582e-06, "loss": 0.6564, "step": 6247 }, { "epoch": 0.91, "grad_norm": 5.702627182006836, "learning_rate": 1.6540338633020264e-06, "loss": 0.7122, "step": 6248 }, { "epoch": 0.91, "grad_norm": 5.646642684936523, "learning_rate": 1.6539141198347152e-06, "loss": 0.7226, "step": 6249 }, { "epoch": 0.91, "grad_norm": 5.542426586151123, "learning_rate": 1.6537943599846482e-06, "loss": 0.7364, "step": 6250 }, { "epoch": 0.91, "grad_norm": 6.477505683898926, "learning_rate": 1.6536745837548257e-06, "loss": 0.7659, "step": 6251 }, { "epoch": 0.91, "grad_norm": 5.587581157684326, "learning_rate": 1.6535547911482491e-06, "loss": 0.7731, "step": 6252 }, { "epoch": 0.91, "grad_norm": 5.986892223358154, "learning_rate": 1.6534349821679187e-06, "loss": 0.776, "step": 6253 }, { "epoch": 0.91, "grad_norm": 5.457764625549316, "learning_rate": 1.6533151568168367e-06, "loss": 0.6691, "step": 6254 }, { "epoch": 0.91, "grad_norm": 6.20581579208374, "learning_rate": 1.6531953150980046e-06, "loss": 0.8037, "step": 6255 }, { "epoch": 0.91, "grad_norm": 5.715847492218018, "learning_rate": 1.6530754570144257e-06, "loss": 0.6657, "step": 6256 }, { "epoch": 0.91, "grad_norm": 5.389403343200684, "learning_rate": 1.652955582569102e-06, "loss": 0.7014, "step": 6257 }, { "epoch": 0.91, "grad_norm": 5.0609917640686035, "learning_rate": 1.6528356917650377e-06, "loss": 0.6407, "step": 6258 }, { "epoch": 0.91, "grad_norm": 4.941007614135742, "learning_rate": 1.6527157846052353e-06, "loss": 0.69, "step": 6259 }, { "epoch": 0.91, "grad_norm": 6.412765979766846, "learning_rate": 1.6525958610926995e-06, "loss": 0.6874, "step": 6260 }, { "epoch": 0.91, "grad_norm": 5.573130130767822, "learning_rate": 1.6524759212304352e-06, "loss": 0.7051, "step": 6261 }, { "epoch": 0.91, "grad_norm": 5.577422142028809, "learning_rate": 1.6523559650214465e-06, "loss": 0.7468, "step": 6262 }, { "epoch": 0.91, "grad_norm": 5.477621078491211, "learning_rate": 1.652235992468739e-06, "loss": 0.6766, "step": 6263 }, { "epoch": 0.91, "grad_norm": 4.8494415283203125, "learning_rate": 1.6521160035753185e-06, "loss": 0.7149, "step": 6264 }, { "epoch": 0.91, "grad_norm": 5.236220359802246, "learning_rate": 1.6519959983441915e-06, "loss": 0.6457, "step": 6265 }, { "epoch": 0.91, "grad_norm": 5.1208271980285645, "learning_rate": 1.6518759767783639e-06, "loss": 0.6738, "step": 6266 }, { "epoch": 0.91, "grad_norm": 5.327686786651611, "learning_rate": 1.6517559388808427e-06, "loss": 0.7055, "step": 6267 }, { "epoch": 0.91, "grad_norm": 5.434369087219238, "learning_rate": 1.6516358846546355e-06, "loss": 0.7191, "step": 6268 }, { "epoch": 0.91, "grad_norm": 5.918288230895996, "learning_rate": 1.6515158141027504e-06, "loss": 0.6595, "step": 6269 }, { "epoch": 0.91, "grad_norm": 5.888002872467041, "learning_rate": 1.6513957272281947e-06, "loss": 0.7185, "step": 6270 }, { "epoch": 0.91, "grad_norm": 5.5890793800354, "learning_rate": 1.6512756240339775e-06, "loss": 0.6655, "step": 6271 }, { "epoch": 0.91, "grad_norm": 6.347745418548584, "learning_rate": 1.651155504523108e-06, "loss": 0.7274, "step": 6272 }, { "epoch": 0.91, "grad_norm": 4.978382110595703, "learning_rate": 1.6510353686985954e-06, "loss": 0.6359, "step": 6273 }, { "epoch": 0.91, "grad_norm": 5.8357367515563965, "learning_rate": 1.650915216563449e-06, "loss": 0.6829, "step": 6274 }, { "epoch": 0.91, "grad_norm": 5.554832458496094, "learning_rate": 1.6507950481206797e-06, "loss": 0.7447, "step": 6275 }, { "epoch": 0.91, "grad_norm": 5.781216144561768, "learning_rate": 1.6506748633732981e-06, "loss": 0.6959, "step": 6276 }, { "epoch": 0.91, "grad_norm": 6.35982608795166, "learning_rate": 1.650554662324315e-06, "loss": 0.632, "step": 6277 }, { "epoch": 0.91, "grad_norm": 5.705060005187988, "learning_rate": 1.6504344449767418e-06, "loss": 0.6901, "step": 6278 }, { "epoch": 0.91, "grad_norm": 5.521439075469971, "learning_rate": 1.6503142113335905e-06, "loss": 0.7144, "step": 6279 }, { "epoch": 0.91, "grad_norm": 6.265161037445068, "learning_rate": 1.650193961397873e-06, "loss": 0.6974, "step": 6280 }, { "epoch": 0.91, "grad_norm": 5.641712665557861, "learning_rate": 1.6500736951726025e-06, "loss": 0.729, "step": 6281 }, { "epoch": 0.91, "grad_norm": 5.211374282836914, "learning_rate": 1.6499534126607916e-06, "loss": 0.63, "step": 6282 }, { "epoch": 0.91, "grad_norm": 5.876452445983887, "learning_rate": 1.6498331138654544e-06, "loss": 0.7288, "step": 6283 }, { "epoch": 0.91, "grad_norm": 5.324671268463135, "learning_rate": 1.649712798789604e-06, "loss": 0.6471, "step": 6284 }, { "epoch": 0.91, "grad_norm": 5.712238311767578, "learning_rate": 1.6495924674362554e-06, "loss": 0.632, "step": 6285 }, { "epoch": 0.91, "grad_norm": 5.2271575927734375, "learning_rate": 1.6494721198084229e-06, "loss": 0.7136, "step": 6286 }, { "epoch": 0.91, "grad_norm": 5.713454723358154, "learning_rate": 1.6493517559091218e-06, "loss": 0.7277, "step": 6287 }, { "epoch": 0.91, "grad_norm": 5.80276346206665, "learning_rate": 1.649231375741367e-06, "loss": 0.723, "step": 6288 }, { "epoch": 0.91, "grad_norm": 5.947843074798584, "learning_rate": 1.6491109793081757e-06, "loss": 0.7988, "step": 6289 }, { "epoch": 0.91, "grad_norm": 5.470602512359619, "learning_rate": 1.6489905666125635e-06, "loss": 0.8177, "step": 6290 }, { "epoch": 0.91, "grad_norm": 5.499958515167236, "learning_rate": 1.6488701376575466e-06, "loss": 0.7031, "step": 6291 }, { "epoch": 0.91, "grad_norm": 5.040362358093262, "learning_rate": 1.648749692446143e-06, "loss": 0.6844, "step": 6292 }, { "epoch": 0.91, "grad_norm": 5.127204418182373, "learning_rate": 1.6486292309813702e-06, "loss": 0.7382, "step": 6293 }, { "epoch": 0.91, "grad_norm": 5.422591686248779, "learning_rate": 1.6485087532662454e-06, "loss": 0.6587, "step": 6294 }, { "epoch": 0.91, "grad_norm": 5.32451868057251, "learning_rate": 1.648388259303788e-06, "loss": 0.7138, "step": 6295 }, { "epoch": 0.91, "grad_norm": 5.535366058349609, "learning_rate": 1.6482677490970157e-06, "loss": 0.658, "step": 6296 }, { "epoch": 0.91, "grad_norm": 5.898591041564941, "learning_rate": 1.6481472226489487e-06, "loss": 0.6943, "step": 6297 }, { "epoch": 0.91, "grad_norm": 5.264493942260742, "learning_rate": 1.6480266799626064e-06, "loss": 0.626, "step": 6298 }, { "epoch": 0.91, "grad_norm": 5.696106433868408, "learning_rate": 1.647906121041008e-06, "loss": 0.6684, "step": 6299 }, { "epoch": 0.91, "grad_norm": 5.956321716308594, "learning_rate": 1.6477855458871748e-06, "loss": 0.7155, "step": 6300 }, { "epoch": 0.91, "grad_norm": 4.6791815757751465, "learning_rate": 1.647664954504127e-06, "loss": 0.5843, "step": 6301 }, { "epoch": 0.91, "grad_norm": 6.248748779296875, "learning_rate": 1.6475443468948863e-06, "loss": 0.6681, "step": 6302 }, { "epoch": 0.91, "grad_norm": 6.050527572631836, "learning_rate": 1.6474237230624743e-06, "loss": 0.7113, "step": 6303 }, { "epoch": 0.91, "grad_norm": 5.337331771850586, "learning_rate": 1.6473030830099126e-06, "loss": 0.6553, "step": 6304 }, { "epoch": 0.91, "grad_norm": 5.588867664337158, "learning_rate": 1.6471824267402243e-06, "loss": 0.681, "step": 6305 }, { "epoch": 0.91, "grad_norm": 5.806641101837158, "learning_rate": 1.6470617542564316e-06, "loss": 0.767, "step": 6306 }, { "epoch": 0.92, "grad_norm": 5.515982151031494, "learning_rate": 1.646941065561558e-06, "loss": 0.696, "step": 6307 }, { "epoch": 0.92, "grad_norm": 5.177097320556641, "learning_rate": 1.6468203606586272e-06, "loss": 0.6837, "step": 6308 }, { "epoch": 0.92, "grad_norm": 5.156691074371338, "learning_rate": 1.6466996395506632e-06, "loss": 0.6902, "step": 6309 }, { "epoch": 0.92, "grad_norm": 5.833051681518555, "learning_rate": 1.6465789022406905e-06, "loss": 0.7358, "step": 6310 }, { "epoch": 0.92, "grad_norm": 5.417111396789551, "learning_rate": 1.646458148731734e-06, "loss": 0.7091, "step": 6311 }, { "epoch": 0.92, "grad_norm": 5.069554805755615, "learning_rate": 1.6463373790268192e-06, "loss": 0.6839, "step": 6312 }, { "epoch": 0.92, "grad_norm": 5.649518966674805, "learning_rate": 1.646216593128971e-06, "loss": 0.6854, "step": 6313 }, { "epoch": 0.92, "grad_norm": 5.235158920288086, "learning_rate": 1.6460957910412167e-06, "loss": 0.707, "step": 6314 }, { "epoch": 0.92, "grad_norm": 5.461526393890381, "learning_rate": 1.6459749727665818e-06, "loss": 0.7147, "step": 6315 }, { "epoch": 0.92, "grad_norm": 5.022187232971191, "learning_rate": 1.6458541383080935e-06, "loss": 0.6394, "step": 6316 }, { "epoch": 0.92, "grad_norm": 6.139946937561035, "learning_rate": 1.6457332876687792e-06, "loss": 0.6025, "step": 6317 }, { "epoch": 0.92, "grad_norm": 4.76528263092041, "learning_rate": 1.6456124208516668e-06, "loss": 0.6349, "step": 6318 }, { "epoch": 0.92, "grad_norm": 4.919158458709717, "learning_rate": 1.645491537859784e-06, "loss": 0.639, "step": 6319 }, { "epoch": 0.92, "grad_norm": 6.522891521453857, "learning_rate": 1.6453706386961596e-06, "loss": 0.7957, "step": 6320 }, { "epoch": 0.92, "grad_norm": 5.9951558113098145, "learning_rate": 1.6452497233638225e-06, "loss": 0.7232, "step": 6321 }, { "epoch": 0.92, "grad_norm": 5.7017059326171875, "learning_rate": 1.6451287918658018e-06, "loss": 0.6965, "step": 6322 }, { "epoch": 0.92, "grad_norm": 4.995896339416504, "learning_rate": 1.6450078442051279e-06, "loss": 0.7279, "step": 6323 }, { "epoch": 0.92, "grad_norm": 5.442657947540283, "learning_rate": 1.64488688038483e-06, "loss": 0.6558, "step": 6324 }, { "epoch": 0.92, "grad_norm": 5.361165523529053, "learning_rate": 1.6447659004079394e-06, "loss": 0.6636, "step": 6325 }, { "epoch": 0.92, "grad_norm": 6.361767768859863, "learning_rate": 1.6446449042774867e-06, "loss": 0.7252, "step": 6326 }, { "epoch": 0.92, "grad_norm": 6.509598255157471, "learning_rate": 1.6445238919965034e-06, "loss": 0.7854, "step": 6327 }, { "epoch": 0.92, "grad_norm": 5.245198726654053, "learning_rate": 1.6444028635680211e-06, "loss": 0.6699, "step": 6328 }, { "epoch": 0.92, "grad_norm": 4.831419944763184, "learning_rate": 1.6442818189950725e-06, "loss": 0.7001, "step": 6329 }, { "epoch": 0.92, "grad_norm": 5.558091640472412, "learning_rate": 1.6441607582806894e-06, "loss": 0.7162, "step": 6330 }, { "epoch": 0.92, "grad_norm": 5.36847448348999, "learning_rate": 1.6440396814279053e-06, "loss": 0.756, "step": 6331 }, { "epoch": 0.92, "grad_norm": 5.140235424041748, "learning_rate": 1.6439185884397534e-06, "loss": 0.6933, "step": 6332 }, { "epoch": 0.92, "grad_norm": 5.673369884490967, "learning_rate": 1.6437974793192676e-06, "loss": 0.6997, "step": 6333 }, { "epoch": 0.92, "grad_norm": 5.954569339752197, "learning_rate": 1.6436763540694817e-06, "loss": 0.7118, "step": 6334 }, { "epoch": 0.92, "grad_norm": 5.4012956619262695, "learning_rate": 1.643555212693431e-06, "loss": 0.7226, "step": 6335 }, { "epoch": 0.92, "grad_norm": 5.497725486755371, "learning_rate": 1.6434340551941497e-06, "loss": 0.7324, "step": 6336 }, { "epoch": 0.92, "grad_norm": 5.417603969573975, "learning_rate": 1.643312881574674e-06, "loss": 0.7131, "step": 6337 }, { "epoch": 0.92, "grad_norm": 5.950793266296387, "learning_rate": 1.643191691838039e-06, "loss": 0.741, "step": 6338 }, { "epoch": 0.92, "grad_norm": 5.752642631530762, "learning_rate": 1.6430704859872817e-06, "loss": 0.7055, "step": 6339 }, { "epoch": 0.92, "grad_norm": 5.537550926208496, "learning_rate": 1.6429492640254378e-06, "loss": 0.6696, "step": 6340 }, { "epoch": 0.92, "grad_norm": 5.326741695404053, "learning_rate": 1.6428280259555452e-06, "loss": 0.7549, "step": 6341 }, { "epoch": 0.92, "grad_norm": 5.79264497756958, "learning_rate": 1.6427067717806407e-06, "loss": 0.7654, "step": 6342 }, { "epoch": 0.92, "grad_norm": 5.3260884284973145, "learning_rate": 1.642585501503762e-06, "loss": 0.6697, "step": 6343 }, { "epoch": 0.92, "grad_norm": 5.967458724975586, "learning_rate": 1.642464215127948e-06, "loss": 0.7312, "step": 6344 }, { "epoch": 0.92, "grad_norm": 5.591574192047119, "learning_rate": 1.6423429126562368e-06, "loss": 0.677, "step": 6345 }, { "epoch": 0.92, "grad_norm": 5.699549198150635, "learning_rate": 1.642221594091668e-06, "loss": 0.7235, "step": 6346 }, { "epoch": 0.92, "grad_norm": 5.723748207092285, "learning_rate": 1.6421002594372802e-06, "loss": 0.747, "step": 6347 }, { "epoch": 0.92, "grad_norm": 5.419236183166504, "learning_rate": 1.641978908696114e-06, "loss": 0.6224, "step": 6348 }, { "epoch": 0.92, "grad_norm": 5.501382350921631, "learning_rate": 1.6418575418712093e-06, "loss": 0.6412, "step": 6349 }, { "epoch": 0.92, "grad_norm": 5.630717754364014, "learning_rate": 1.6417361589656067e-06, "loss": 0.7423, "step": 6350 }, { "epoch": 0.92, "grad_norm": 5.880874156951904, "learning_rate": 1.6416147599823473e-06, "loss": 0.7378, "step": 6351 }, { "epoch": 0.92, "grad_norm": 7.737728118896484, "learning_rate": 1.6414933449244726e-06, "loss": 0.7676, "step": 6352 }, { "epoch": 0.92, "grad_norm": 5.717639923095703, "learning_rate": 1.6413719137950246e-06, "loss": 0.683, "step": 6353 }, { "epoch": 0.92, "grad_norm": 5.600001335144043, "learning_rate": 1.6412504665970453e-06, "loss": 0.7282, "step": 6354 }, { "epoch": 0.92, "grad_norm": 5.301677703857422, "learning_rate": 1.6411290033335776e-06, "loss": 0.7191, "step": 6355 }, { "epoch": 0.92, "grad_norm": 5.494040489196777, "learning_rate": 1.6410075240076644e-06, "loss": 0.7974, "step": 6356 }, { "epoch": 0.92, "grad_norm": 5.698023319244385, "learning_rate": 1.6408860286223494e-06, "loss": 0.7041, "step": 6357 }, { "epoch": 0.92, "grad_norm": 5.266940593719482, "learning_rate": 1.640764517180676e-06, "loss": 0.7556, "step": 6358 }, { "epoch": 0.92, "grad_norm": 5.192230224609375, "learning_rate": 1.6406429896856888e-06, "loss": 0.6644, "step": 6359 }, { "epoch": 0.92, "grad_norm": 5.573678016662598, "learning_rate": 1.6405214461404326e-06, "loss": 0.7018, "step": 6360 }, { "epoch": 0.92, "grad_norm": 5.3759074211120605, "learning_rate": 1.640399886547952e-06, "loss": 0.6535, "step": 6361 }, { "epoch": 0.92, "grad_norm": 5.312867641448975, "learning_rate": 1.640278310911293e-06, "loss": 0.655, "step": 6362 }, { "epoch": 0.92, "grad_norm": 5.199551582336426, "learning_rate": 1.6401567192335014e-06, "loss": 0.6318, "step": 6363 }, { "epoch": 0.92, "grad_norm": 6.2904181480407715, "learning_rate": 1.6400351115176233e-06, "loss": 0.7161, "step": 6364 }, { "epoch": 0.92, "grad_norm": 5.903825283050537, "learning_rate": 1.639913487766705e-06, "loss": 0.7224, "step": 6365 }, { "epoch": 0.92, "grad_norm": 5.3053460121154785, "learning_rate": 1.6397918479837944e-06, "loss": 0.6729, "step": 6366 }, { "epoch": 0.92, "grad_norm": 5.463495254516602, "learning_rate": 1.6396701921719384e-06, "loss": 0.7206, "step": 6367 }, { "epoch": 0.92, "grad_norm": 5.3901143074035645, "learning_rate": 1.6395485203341854e-06, "loss": 0.6821, "step": 6368 }, { "epoch": 0.92, "grad_norm": 5.373685836791992, "learning_rate": 1.6394268324735832e-06, "loss": 0.7146, "step": 6369 }, { "epoch": 0.92, "grad_norm": 5.727716445922852, "learning_rate": 1.6393051285931807e-06, "loss": 0.7317, "step": 6370 }, { "epoch": 0.92, "grad_norm": 5.518399715423584, "learning_rate": 1.6391834086960267e-06, "loss": 0.694, "step": 6371 }, { "epoch": 0.92, "grad_norm": 5.747856616973877, "learning_rate": 1.6390616727851714e-06, "loss": 0.7558, "step": 6372 }, { "epoch": 0.92, "grad_norm": 5.272909641265869, "learning_rate": 1.638939920863664e-06, "loss": 0.7009, "step": 6373 }, { "epoch": 0.92, "grad_norm": 5.611332893371582, "learning_rate": 1.6388181529345552e-06, "loss": 0.769, "step": 6374 }, { "epoch": 0.92, "grad_norm": 5.452805519104004, "learning_rate": 1.6386963690008955e-06, "loss": 0.8044, "step": 6375 }, { "epoch": 0.93, "grad_norm": 5.399041652679443, "learning_rate": 1.638574569065736e-06, "loss": 0.7093, "step": 6376 }, { "epoch": 0.93, "grad_norm": 5.63832426071167, "learning_rate": 1.6384527531321281e-06, "loss": 0.6863, "step": 6377 }, { "epoch": 0.93, "grad_norm": 5.802353858947754, "learning_rate": 1.6383309212031237e-06, "loss": 0.7485, "step": 6378 }, { "epoch": 0.93, "grad_norm": 5.252549171447754, "learning_rate": 1.6382090732817757e-06, "loss": 0.6755, "step": 6379 }, { "epoch": 0.93, "grad_norm": 5.760387897491455, "learning_rate": 1.6380872093711363e-06, "loss": 0.715, "step": 6380 }, { "epoch": 0.93, "grad_norm": 6.0060248374938965, "learning_rate": 1.6379653294742583e-06, "loss": 0.7677, "step": 6381 }, { "epoch": 0.93, "grad_norm": 5.21674108505249, "learning_rate": 1.637843433594196e-06, "loss": 0.743, "step": 6382 }, { "epoch": 0.93, "grad_norm": 5.547600746154785, "learning_rate": 1.6377215217340026e-06, "loss": 0.6286, "step": 6383 }, { "epoch": 0.93, "grad_norm": 4.575716495513916, "learning_rate": 1.6375995938967327e-06, "loss": 0.6378, "step": 6384 }, { "epoch": 0.93, "grad_norm": 5.119490623474121, "learning_rate": 1.637477650085441e-06, "loss": 0.6864, "step": 6385 }, { "epoch": 0.93, "grad_norm": 5.115571975708008, "learning_rate": 1.6373556903031825e-06, "loss": 0.7358, "step": 6386 }, { "epoch": 0.93, "grad_norm": 5.513307571411133, "learning_rate": 1.6372337145530129e-06, "loss": 0.6557, "step": 6387 }, { "epoch": 0.93, "grad_norm": 5.7813215255737305, "learning_rate": 1.6371117228379878e-06, "loss": 0.7763, "step": 6388 }, { "epoch": 0.93, "grad_norm": 6.061863899230957, "learning_rate": 1.6369897151611637e-06, "loss": 0.7349, "step": 6389 }, { "epoch": 0.93, "grad_norm": 5.272605895996094, "learning_rate": 1.6368676915255975e-06, "loss": 0.6927, "step": 6390 }, { "epoch": 0.93, "grad_norm": 5.783840656280518, "learning_rate": 1.6367456519343458e-06, "loss": 0.7039, "step": 6391 }, { "epoch": 0.93, "grad_norm": 6.561422348022461, "learning_rate": 1.6366235963904667e-06, "loss": 0.8345, "step": 6392 }, { "epoch": 0.93, "grad_norm": 5.546324729919434, "learning_rate": 1.6365015248970176e-06, "loss": 0.6889, "step": 6393 }, { "epoch": 0.93, "grad_norm": 5.828275680541992, "learning_rate": 1.636379437457057e-06, "loss": 0.7248, "step": 6394 }, { "epoch": 0.93, "grad_norm": 5.5828070640563965, "learning_rate": 1.6362573340736437e-06, "loss": 0.7214, "step": 6395 }, { "epoch": 0.93, "grad_norm": 5.42747163772583, "learning_rate": 1.6361352147498367e-06, "loss": 0.7084, "step": 6396 }, { "epoch": 0.93, "grad_norm": 4.896553993225098, "learning_rate": 1.6360130794886952e-06, "loss": 0.6179, "step": 6397 }, { "epoch": 0.93, "grad_norm": 5.976197719573975, "learning_rate": 1.6358909282932799e-06, "loss": 0.7187, "step": 6398 }, { "epoch": 0.93, "grad_norm": 6.002241134643555, "learning_rate": 1.6357687611666503e-06, "loss": 0.6314, "step": 6399 }, { "epoch": 0.93, "grad_norm": 5.234002590179443, "learning_rate": 1.6356465781118675e-06, "loss": 0.6905, "step": 6400 }, { "epoch": 0.93, "grad_norm": 4.961750507354736, "learning_rate": 1.6355243791319926e-06, "loss": 0.6733, "step": 6401 }, { "epoch": 0.93, "grad_norm": 5.487204074859619, "learning_rate": 1.6354021642300868e-06, "loss": 0.7705, "step": 6402 }, { "epoch": 0.93, "grad_norm": 6.008970260620117, "learning_rate": 1.6352799334092122e-06, "loss": 0.7353, "step": 6403 }, { "epoch": 0.93, "grad_norm": 5.933187961578369, "learning_rate": 1.635157686672431e-06, "loss": 0.7293, "step": 6404 }, { "epoch": 0.93, "grad_norm": 6.004855632781982, "learning_rate": 1.6350354240228061e-06, "loss": 0.6699, "step": 6405 }, { "epoch": 0.93, "grad_norm": 5.63867712020874, "learning_rate": 1.6349131454634004e-06, "loss": 0.6601, "step": 6406 }, { "epoch": 0.93, "grad_norm": 5.652961730957031, "learning_rate": 1.6347908509972776e-06, "loss": 0.7278, "step": 6407 }, { "epoch": 0.93, "grad_norm": 4.9498982429504395, "learning_rate": 1.6346685406275015e-06, "loss": 0.6466, "step": 6408 }, { "epoch": 0.93, "grad_norm": 5.687402725219727, "learning_rate": 1.6345462143571363e-06, "loss": 0.6913, "step": 6409 }, { "epoch": 0.93, "grad_norm": 5.844446659088135, "learning_rate": 1.634423872189247e-06, "loss": 0.7248, "step": 6410 }, { "epoch": 0.93, "grad_norm": 5.333186149597168, "learning_rate": 1.6343015141268982e-06, "loss": 0.7638, "step": 6411 }, { "epoch": 0.93, "grad_norm": 6.040863990783691, "learning_rate": 1.6341791401731552e-06, "loss": 0.7219, "step": 6412 }, { "epoch": 0.93, "grad_norm": 5.239193439483643, "learning_rate": 1.6340567503310844e-06, "loss": 0.7266, "step": 6413 }, { "epoch": 0.93, "grad_norm": 5.253554344177246, "learning_rate": 1.6339343446037526e-06, "loss": 0.7298, "step": 6414 }, { "epoch": 0.93, "grad_norm": 5.34597635269165, "learning_rate": 1.633811922994225e-06, "loss": 0.6654, "step": 6415 }, { "epoch": 0.93, "grad_norm": 5.764249801635742, "learning_rate": 1.63368948550557e-06, "loss": 0.6572, "step": 6416 }, { "epoch": 0.93, "grad_norm": 5.490586280822754, "learning_rate": 1.6335670321408546e-06, "loss": 0.6558, "step": 6417 }, { "epoch": 0.93, "grad_norm": 5.701415538787842, "learning_rate": 1.6334445629031465e-06, "loss": 0.7297, "step": 6418 }, { "epoch": 0.93, "grad_norm": 5.527210235595703, "learning_rate": 1.6333220777955141e-06, "loss": 0.7496, "step": 6419 }, { "epoch": 0.93, "grad_norm": 6.287514686584473, "learning_rate": 1.6331995768210261e-06, "loss": 0.7525, "step": 6420 }, { "epoch": 0.93, "grad_norm": 5.7158122062683105, "learning_rate": 1.6330770599827517e-06, "loss": 0.7247, "step": 6421 }, { "epoch": 0.93, "grad_norm": 5.4844465255737305, "learning_rate": 1.6329545272837598e-06, "loss": 0.7344, "step": 6422 }, { "epoch": 0.93, "grad_norm": 5.492530345916748, "learning_rate": 1.632831978727121e-06, "loss": 0.6962, "step": 6423 }, { "epoch": 0.93, "grad_norm": 5.137549877166748, "learning_rate": 1.6327094143159053e-06, "loss": 0.6554, "step": 6424 }, { "epoch": 0.93, "grad_norm": 7.051198959350586, "learning_rate": 1.632586834053183e-06, "loss": 0.6879, "step": 6425 }, { "epoch": 0.93, "grad_norm": 5.279117107391357, "learning_rate": 1.6324642379420254e-06, "loss": 0.7245, "step": 6426 }, { "epoch": 0.93, "grad_norm": 5.504046440124512, "learning_rate": 1.6323416259855041e-06, "loss": 0.7101, "step": 6427 }, { "epoch": 0.93, "grad_norm": 5.480838298797607, "learning_rate": 1.632218998186691e-06, "loss": 0.6773, "step": 6428 }, { "epoch": 0.93, "grad_norm": 6.112246513366699, "learning_rate": 1.6320963545486577e-06, "loss": 0.7527, "step": 6429 }, { "epoch": 0.93, "grad_norm": 5.446925163269043, "learning_rate": 1.6319736950744773e-06, "loss": 0.7, "step": 6430 }, { "epoch": 0.93, "grad_norm": 4.8650221824646, "learning_rate": 1.6318510197672233e-06, "loss": 0.6262, "step": 6431 }, { "epoch": 0.93, "grad_norm": 5.783127307891846, "learning_rate": 1.6317283286299683e-06, "loss": 0.7236, "step": 6432 }, { "epoch": 0.93, "grad_norm": 6.301671028137207, "learning_rate": 1.6316056216657865e-06, "loss": 0.7654, "step": 6433 }, { "epoch": 0.93, "grad_norm": 5.211878299713135, "learning_rate": 1.6314828988777523e-06, "loss": 0.6425, "step": 6434 }, { "epoch": 0.93, "grad_norm": 5.600496768951416, "learning_rate": 1.63136016026894e-06, "loss": 0.7, "step": 6435 }, { "epoch": 0.93, "grad_norm": 5.8686113357543945, "learning_rate": 1.6312374058424245e-06, "loss": 0.7786, "step": 6436 }, { "epoch": 0.93, "grad_norm": 5.314369201660156, "learning_rate": 1.631114635601282e-06, "loss": 0.6679, "step": 6437 }, { "epoch": 0.93, "grad_norm": 6.047618865966797, "learning_rate": 1.6309918495485874e-06, "loss": 0.7481, "step": 6438 }, { "epoch": 0.93, "grad_norm": 5.389162063598633, "learning_rate": 1.6308690476874172e-06, "loss": 0.7013, "step": 6439 }, { "epoch": 0.93, "grad_norm": 5.612423896789551, "learning_rate": 1.6307462300208481e-06, "loss": 0.6647, "step": 6440 }, { "epoch": 0.93, "grad_norm": 5.385460376739502, "learning_rate": 1.6306233965519572e-06, "loss": 0.7263, "step": 6441 }, { "epoch": 0.93, "grad_norm": 6.441549777984619, "learning_rate": 1.6305005472838218e-06, "loss": 0.7009, "step": 6442 }, { "epoch": 0.93, "grad_norm": 5.846566677093506, "learning_rate": 1.6303776822195194e-06, "loss": 0.7149, "step": 6443 }, { "epoch": 0.93, "grad_norm": 5.660891532897949, "learning_rate": 1.6302548013621285e-06, "loss": 0.7521, "step": 6444 }, { "epoch": 0.94, "grad_norm": 5.979224681854248, "learning_rate": 1.6301319047147276e-06, "loss": 0.6657, "step": 6445 }, { "epoch": 0.94, "grad_norm": 5.294317245483398, "learning_rate": 1.630008992280396e-06, "loss": 0.6869, "step": 6446 }, { "epoch": 0.94, "grad_norm": 5.745062351226807, "learning_rate": 1.6298860640622124e-06, "loss": 0.7103, "step": 6447 }, { "epoch": 0.94, "grad_norm": 5.818003177642822, "learning_rate": 1.6297631200632572e-06, "loss": 0.7335, "step": 6448 }, { "epoch": 0.94, "grad_norm": 5.602212429046631, "learning_rate": 1.62964016028661e-06, "loss": 0.6757, "step": 6449 }, { "epoch": 0.94, "grad_norm": 6.092465400695801, "learning_rate": 1.6295171847353518e-06, "loss": 0.7284, "step": 6450 }, { "epoch": 0.94, "grad_norm": 5.6618194580078125, "learning_rate": 1.6293941934125633e-06, "loss": 0.6919, "step": 6451 }, { "epoch": 0.94, "grad_norm": 5.521683216094971, "learning_rate": 1.629271186321326e-06, "loss": 0.6808, "step": 6452 }, { "epoch": 0.94, "grad_norm": 5.971271514892578, "learning_rate": 1.6291481634647216e-06, "loss": 0.6724, "step": 6453 }, { "epoch": 0.94, "grad_norm": 4.7975873947143555, "learning_rate": 1.6290251248458321e-06, "loss": 0.705, "step": 6454 }, { "epoch": 0.94, "grad_norm": 6.263693332672119, "learning_rate": 1.6289020704677401e-06, "loss": 0.7201, "step": 6455 }, { "epoch": 0.94, "grad_norm": 5.894188404083252, "learning_rate": 1.6287790003335286e-06, "loss": 0.7092, "step": 6456 }, { "epoch": 0.94, "grad_norm": 5.638817310333252, "learning_rate": 1.628655914446281e-06, "loss": 0.7104, "step": 6457 }, { "epoch": 0.94, "grad_norm": 5.0731000900268555, "learning_rate": 1.6285328128090812e-06, "loss": 0.7215, "step": 6458 }, { "epoch": 0.94, "grad_norm": 6.1098713874816895, "learning_rate": 1.6284096954250126e-06, "loss": 0.6791, "step": 6459 }, { "epoch": 0.94, "grad_norm": 5.497915267944336, "learning_rate": 1.6282865622971604e-06, "loss": 0.7197, "step": 6460 }, { "epoch": 0.94, "grad_norm": 4.829292297363281, "learning_rate": 1.6281634134286093e-06, "loss": 0.71, "step": 6461 }, { "epoch": 0.94, "grad_norm": 4.838714599609375, "learning_rate": 1.6280402488224442e-06, "loss": 0.6244, "step": 6462 }, { "epoch": 0.94, "grad_norm": 5.903862953186035, "learning_rate": 1.6279170684817515e-06, "loss": 0.7168, "step": 6463 }, { "epoch": 0.94, "grad_norm": 5.349632740020752, "learning_rate": 1.6277938724096166e-06, "loss": 0.6817, "step": 6464 }, { "epoch": 0.94, "grad_norm": 5.073818683624268, "learning_rate": 1.6276706606091266e-06, "loss": 0.6008, "step": 6465 }, { "epoch": 0.94, "grad_norm": 5.646026611328125, "learning_rate": 1.6275474330833678e-06, "loss": 0.7189, "step": 6466 }, { "epoch": 0.94, "grad_norm": 5.609305381774902, "learning_rate": 1.6274241898354278e-06, "loss": 0.6862, "step": 6467 }, { "epoch": 0.94, "grad_norm": 5.340491771697998, "learning_rate": 1.6273009308683943e-06, "loss": 0.7469, "step": 6468 }, { "epoch": 0.94, "grad_norm": 5.35798454284668, "learning_rate": 1.627177656185355e-06, "loss": 0.735, "step": 6469 }, { "epoch": 0.94, "grad_norm": 5.5568718910217285, "learning_rate": 1.6270543657893987e-06, "loss": 0.6869, "step": 6470 }, { "epoch": 0.94, "grad_norm": 5.7900285720825195, "learning_rate": 1.6269310596836142e-06, "loss": 0.7828, "step": 6471 }, { "epoch": 0.94, "grad_norm": 5.348249912261963, "learning_rate": 1.6268077378710904e-06, "loss": 0.6865, "step": 6472 }, { "epoch": 0.94, "grad_norm": 5.761090278625488, "learning_rate": 1.6266844003549174e-06, "loss": 0.7273, "step": 6473 }, { "epoch": 0.94, "grad_norm": 4.935208320617676, "learning_rate": 1.626561047138185e-06, "loss": 0.5937, "step": 6474 }, { "epoch": 0.94, "grad_norm": 5.669658660888672, "learning_rate": 1.6264376782239838e-06, "loss": 0.6844, "step": 6475 }, { "epoch": 0.94, "grad_norm": 5.348635673522949, "learning_rate": 1.626314293615404e-06, "loss": 0.6783, "step": 6476 }, { "epoch": 0.94, "grad_norm": 5.813222408294678, "learning_rate": 1.6261908933155372e-06, "loss": 0.6554, "step": 6477 }, { "epoch": 0.94, "grad_norm": 5.21986722946167, "learning_rate": 1.6260674773274755e-06, "loss": 0.6236, "step": 6478 }, { "epoch": 0.94, "grad_norm": 5.513417720794678, "learning_rate": 1.62594404565431e-06, "loss": 0.7563, "step": 6479 }, { "epoch": 0.94, "grad_norm": 5.668037414550781, "learning_rate": 1.6258205982991338e-06, "loss": 0.6475, "step": 6480 }, { "epoch": 0.94, "grad_norm": 5.5237226486206055, "learning_rate": 1.6256971352650392e-06, "loss": 0.7094, "step": 6481 }, { "epoch": 0.94, "grad_norm": 5.428618907928467, "learning_rate": 1.6255736565551194e-06, "loss": 0.7015, "step": 6482 }, { "epoch": 0.94, "grad_norm": 4.85528039932251, "learning_rate": 1.6254501621724682e-06, "loss": 0.6438, "step": 6483 }, { "epoch": 0.94, "grad_norm": 5.7756524085998535, "learning_rate": 1.6253266521201794e-06, "loss": 0.7792, "step": 6484 }, { "epoch": 0.94, "grad_norm": 5.414736747741699, "learning_rate": 1.6252031264013475e-06, "loss": 0.7167, "step": 6485 }, { "epoch": 0.94, "grad_norm": 5.59665060043335, "learning_rate": 1.625079585019067e-06, "loss": 0.7398, "step": 6486 }, { "epoch": 0.94, "grad_norm": 6.127492904663086, "learning_rate": 1.624956027976433e-06, "loss": 0.7302, "step": 6487 }, { "epoch": 0.94, "grad_norm": 5.28858757019043, "learning_rate": 1.6248324552765418e-06, "loss": 0.6976, "step": 6488 }, { "epoch": 0.94, "grad_norm": 4.994749546051025, "learning_rate": 1.624708866922488e-06, "loss": 0.7218, "step": 6489 }, { "epoch": 0.94, "grad_norm": 6.118768215179443, "learning_rate": 1.6245852629173689e-06, "loss": 0.6432, "step": 6490 }, { "epoch": 0.94, "grad_norm": 5.888428688049316, "learning_rate": 1.6244616432642808e-06, "loss": 0.736, "step": 6491 }, { "epoch": 0.94, "grad_norm": 5.395687103271484, "learning_rate": 1.624338007966321e-06, "loss": 0.6612, "step": 6492 }, { "epoch": 0.94, "grad_norm": 5.072044849395752, "learning_rate": 1.6242143570265868e-06, "loss": 0.6494, "step": 6493 }, { "epoch": 0.94, "grad_norm": 5.619058609008789, "learning_rate": 1.6240906904481759e-06, "loss": 0.7099, "step": 6494 }, { "epoch": 0.94, "grad_norm": 5.695852279663086, "learning_rate": 1.623967008234187e-06, "loss": 0.8015, "step": 6495 }, { "epoch": 0.94, "grad_norm": 6.306382656097412, "learning_rate": 1.6238433103877185e-06, "loss": 0.7419, "step": 6496 }, { "epoch": 0.94, "grad_norm": 5.698299884796143, "learning_rate": 1.6237195969118695e-06, "loss": 0.6222, "step": 6497 }, { "epoch": 0.94, "grad_norm": 5.104763031005859, "learning_rate": 1.6235958678097395e-06, "loss": 0.6242, "step": 6498 }, { "epoch": 0.94, "grad_norm": 5.614286422729492, "learning_rate": 1.623472123084428e-06, "loss": 0.718, "step": 6499 }, { "epoch": 0.94, "grad_norm": 5.6339240074157715, "learning_rate": 1.6233483627390357e-06, "loss": 0.7314, "step": 6500 }, { "epoch": 0.94, "grad_norm": 6.253380298614502, "learning_rate": 1.6232245867766632e-06, "loss": 0.7777, "step": 6501 }, { "epoch": 0.94, "grad_norm": 6.356725692749023, "learning_rate": 1.623100795200411e-06, "loss": 0.6467, "step": 6502 }, { "epoch": 0.94, "grad_norm": 5.453893184661865, "learning_rate": 1.6229769880133807e-06, "loss": 0.6065, "step": 6503 }, { "epoch": 0.94, "grad_norm": 6.088276386260986, "learning_rate": 1.6228531652186747e-06, "loss": 0.7473, "step": 6504 }, { "epoch": 0.94, "grad_norm": 5.774555206298828, "learning_rate": 1.6227293268193943e-06, "loss": 0.7613, "step": 6505 }, { "epoch": 0.94, "grad_norm": 5.891444206237793, "learning_rate": 1.6226054728186424e-06, "loss": 0.7515, "step": 6506 }, { "epoch": 0.94, "grad_norm": 5.429782390594482, "learning_rate": 1.6224816032195224e-06, "loss": 0.6286, "step": 6507 }, { "epoch": 0.94, "grad_norm": 6.076323509216309, "learning_rate": 1.6223577180251368e-06, "loss": 0.7387, "step": 6508 }, { "epoch": 0.94, "grad_norm": 5.045918941497803, "learning_rate": 1.6222338172385902e-06, "loss": 0.7218, "step": 6509 }, { "epoch": 0.94, "grad_norm": 5.536443710327148, "learning_rate": 1.622109900862986e-06, "loss": 0.7944, "step": 6510 }, { "epoch": 0.94, "grad_norm": 5.744329929351807, "learning_rate": 1.6219859689014292e-06, "loss": 0.6837, "step": 6511 }, { "epoch": 0.94, "grad_norm": 5.280383110046387, "learning_rate": 1.6218620213570247e-06, "loss": 0.6539, "step": 6512 }, { "epoch": 0.95, "grad_norm": 5.627529144287109, "learning_rate": 1.6217380582328772e-06, "loss": 0.7012, "step": 6513 }, { "epoch": 0.95, "grad_norm": 5.561942100524902, "learning_rate": 1.6216140795320934e-06, "loss": 0.707, "step": 6514 }, { "epoch": 0.95, "grad_norm": 4.949697494506836, "learning_rate": 1.6214900852577786e-06, "loss": 0.6383, "step": 6515 }, { "epoch": 0.95, "grad_norm": 5.715319633483887, "learning_rate": 1.6213660754130397e-06, "loss": 0.728, "step": 6516 }, { "epoch": 0.95, "grad_norm": 5.817513942718506, "learning_rate": 1.621242050000983e-06, "loss": 0.7004, "step": 6517 }, { "epoch": 0.95, "grad_norm": 5.945786476135254, "learning_rate": 1.6211180090247165e-06, "loss": 0.7711, "step": 6518 }, { "epoch": 0.95, "grad_norm": 5.546589374542236, "learning_rate": 1.6209939524873475e-06, "loss": 0.7143, "step": 6519 }, { "epoch": 0.95, "grad_norm": 5.208666801452637, "learning_rate": 1.620869880391984e-06, "loss": 0.6835, "step": 6520 }, { "epoch": 0.95, "grad_norm": 5.42313289642334, "learning_rate": 1.6207457927417344e-06, "loss": 0.7545, "step": 6521 }, { "epoch": 0.95, "grad_norm": 5.730363845825195, "learning_rate": 1.6206216895397078e-06, "loss": 0.7047, "step": 6522 }, { "epoch": 0.95, "grad_norm": 5.711402416229248, "learning_rate": 1.6204975707890127e-06, "loss": 0.6169, "step": 6523 }, { "epoch": 0.95, "grad_norm": 5.22484016418457, "learning_rate": 1.6203734364927595e-06, "loss": 0.6672, "step": 6524 }, { "epoch": 0.95, "grad_norm": 5.472838401794434, "learning_rate": 1.6202492866540575e-06, "loss": 0.6672, "step": 6525 }, { "epoch": 0.95, "grad_norm": 5.395598411560059, "learning_rate": 1.6201251212760179e-06, "loss": 0.6748, "step": 6526 }, { "epoch": 0.95, "grad_norm": 5.318858623504639, "learning_rate": 1.6200009403617508e-06, "loss": 0.6312, "step": 6527 }, { "epoch": 0.95, "grad_norm": 6.190143585205078, "learning_rate": 1.6198767439143674e-06, "loss": 0.7848, "step": 6528 }, { "epoch": 0.95, "grad_norm": 5.369027137756348, "learning_rate": 1.6197525319369798e-06, "loss": 0.7411, "step": 6529 }, { "epoch": 0.95, "grad_norm": 6.875641345977783, "learning_rate": 1.619628304432699e-06, "loss": 0.8015, "step": 6530 }, { "epoch": 0.95, "grad_norm": 5.646941184997559, "learning_rate": 1.619504061404638e-06, "loss": 0.7292, "step": 6531 }, { "epoch": 0.95, "grad_norm": 5.69089412689209, "learning_rate": 1.6193798028559093e-06, "loss": 0.6361, "step": 6532 }, { "epoch": 0.95, "grad_norm": 5.5432000160217285, "learning_rate": 1.619255528789626e-06, "loss": 0.5669, "step": 6533 }, { "epoch": 0.95, "grad_norm": 5.79276704788208, "learning_rate": 1.6191312392089018e-06, "loss": 0.711, "step": 6534 }, { "epoch": 0.95, "grad_norm": 6.257812023162842, "learning_rate": 1.6190069341168501e-06, "loss": 0.6948, "step": 6535 }, { "epoch": 0.95, "grad_norm": 6.225227355957031, "learning_rate": 1.6188826135165857e-06, "loss": 0.7918, "step": 6536 }, { "epoch": 0.95, "grad_norm": 5.997756004333496, "learning_rate": 1.6187582774112229e-06, "loss": 0.745, "step": 6537 }, { "epoch": 0.95, "grad_norm": 5.403318881988525, "learning_rate": 1.6186339258038766e-06, "loss": 0.7065, "step": 6538 }, { "epoch": 0.95, "grad_norm": 5.000885963439941, "learning_rate": 1.618509558697663e-06, "loss": 0.6184, "step": 6539 }, { "epoch": 0.95, "grad_norm": 4.973949432373047, "learning_rate": 1.618385176095697e-06, "loss": 0.6817, "step": 6540 }, { "epoch": 0.95, "grad_norm": 5.765843391418457, "learning_rate": 1.6182607780010953e-06, "loss": 0.6994, "step": 6541 }, { "epoch": 0.95, "grad_norm": 5.512903690338135, "learning_rate": 1.6181363644169744e-06, "loss": 0.7467, "step": 6542 }, { "epoch": 0.95, "grad_norm": 5.264389991760254, "learning_rate": 1.6180119353464509e-06, "loss": 0.6622, "step": 6543 }, { "epoch": 0.95, "grad_norm": 5.6444220542907715, "learning_rate": 1.617887490792643e-06, "loss": 0.7126, "step": 6544 }, { "epoch": 0.95, "grad_norm": 5.757080554962158, "learning_rate": 1.6177630307586679e-06, "loss": 0.6956, "step": 6545 }, { "epoch": 0.95, "grad_norm": 5.659553527832031, "learning_rate": 1.6176385552476436e-06, "loss": 0.6882, "step": 6546 }, { "epoch": 0.95, "grad_norm": 5.696178436279297, "learning_rate": 1.617514064262689e-06, "loss": 0.7644, "step": 6547 }, { "epoch": 0.95, "grad_norm": 5.180817127227783, "learning_rate": 1.617389557806923e-06, "loss": 0.7404, "step": 6548 }, { "epoch": 0.95, "grad_norm": 6.031747341156006, "learning_rate": 1.6172650358834645e-06, "loss": 0.721, "step": 6549 }, { "epoch": 0.95, "grad_norm": 5.554394245147705, "learning_rate": 1.6171404984954337e-06, "loss": 0.6762, "step": 6550 }, { "epoch": 0.95, "grad_norm": 5.671485424041748, "learning_rate": 1.6170159456459501e-06, "loss": 0.7378, "step": 6551 }, { "epoch": 0.95, "grad_norm": 5.642347812652588, "learning_rate": 1.616891377338135e-06, "loss": 0.6686, "step": 6552 }, { "epoch": 0.95, "grad_norm": 6.071635723114014, "learning_rate": 1.6167667935751085e-06, "loss": 0.6277, "step": 6553 }, { "epoch": 0.95, "grad_norm": 5.241722106933594, "learning_rate": 1.6166421943599923e-06, "loss": 0.6671, "step": 6554 }, { "epoch": 0.95, "grad_norm": 5.185245513916016, "learning_rate": 1.6165175796959077e-06, "loss": 0.667, "step": 6555 }, { "epoch": 0.95, "grad_norm": 6.029524803161621, "learning_rate": 1.616392949585977e-06, "loss": 0.6959, "step": 6556 }, { "epoch": 0.95, "grad_norm": 5.781485080718994, "learning_rate": 1.6162683040333224e-06, "loss": 0.8021, "step": 6557 }, { "epoch": 0.95, "grad_norm": 4.714538097381592, "learning_rate": 1.6161436430410668e-06, "loss": 0.633, "step": 6558 }, { "epoch": 0.95, "grad_norm": 5.12293815612793, "learning_rate": 1.6160189666123333e-06, "loss": 0.6412, "step": 6559 }, { "epoch": 0.95, "grad_norm": 5.160076141357422, "learning_rate": 1.6158942747502455e-06, "loss": 0.7136, "step": 6560 }, { "epoch": 0.95, "grad_norm": 5.815604209899902, "learning_rate": 1.6157695674579275e-06, "loss": 0.6854, "step": 6561 }, { "epoch": 0.95, "grad_norm": 6.379368305206299, "learning_rate": 1.6156448447385033e-06, "loss": 0.8334, "step": 6562 }, { "epoch": 0.95, "grad_norm": 5.123784065246582, "learning_rate": 1.6155201065950982e-06, "loss": 0.7598, "step": 6563 }, { "epoch": 0.95, "grad_norm": 5.461400985717773, "learning_rate": 1.6153953530308367e-06, "loss": 0.7162, "step": 6564 }, { "epoch": 0.95, "grad_norm": 5.633934497833252, "learning_rate": 1.6152705840488446e-06, "loss": 0.716, "step": 6565 }, { "epoch": 0.95, "grad_norm": 6.292341709136963, "learning_rate": 1.6151457996522476e-06, "loss": 0.6262, "step": 6566 }, { "epoch": 0.95, "grad_norm": 5.5758056640625, "learning_rate": 1.6150209998441721e-06, "loss": 0.7496, "step": 6567 }, { "epoch": 0.95, "grad_norm": 6.082156181335449, "learning_rate": 1.614896184627745e-06, "loss": 0.6913, "step": 6568 }, { "epoch": 0.95, "grad_norm": 4.966793060302734, "learning_rate": 1.6147713540060927e-06, "loss": 0.6799, "step": 6569 }, { "epoch": 0.95, "grad_norm": 5.491507530212402, "learning_rate": 1.6146465079823433e-06, "loss": 0.6907, "step": 6570 }, { "epoch": 0.95, "grad_norm": 6.146963119506836, "learning_rate": 1.6145216465596242e-06, "loss": 0.6491, "step": 6571 }, { "epoch": 0.95, "grad_norm": 5.534919738769531, "learning_rate": 1.6143967697410638e-06, "loss": 0.6521, "step": 6572 }, { "epoch": 0.95, "grad_norm": 5.676693916320801, "learning_rate": 1.614271877529791e-06, "loss": 0.7983, "step": 6573 }, { "epoch": 0.95, "grad_norm": 5.926886558532715, "learning_rate": 1.6141469699289338e-06, "loss": 0.7599, "step": 6574 }, { "epoch": 0.95, "grad_norm": 5.107292175292969, "learning_rate": 1.6140220469416227e-06, "loss": 0.644, "step": 6575 }, { "epoch": 0.95, "grad_norm": 5.5680928230285645, "learning_rate": 1.6138971085709864e-06, "loss": 0.7426, "step": 6576 }, { "epoch": 0.95, "grad_norm": 5.493329048156738, "learning_rate": 1.6137721548201556e-06, "loss": 0.7049, "step": 6577 }, { "epoch": 0.95, "grad_norm": 6.47682523727417, "learning_rate": 1.6136471856922608e-06, "loss": 0.6929, "step": 6578 }, { "epoch": 0.95, "grad_norm": 5.449382781982422, "learning_rate": 1.6135222011904332e-06, "loss": 0.684, "step": 6579 }, { "epoch": 0.95, "grad_norm": 5.166391849517822, "learning_rate": 1.6133972013178032e-06, "loss": 0.6466, "step": 6580 }, { "epoch": 0.95, "grad_norm": 4.929218292236328, "learning_rate": 1.6132721860775033e-06, "loss": 0.6882, "step": 6581 }, { "epoch": 0.96, "grad_norm": 5.658483505249023, "learning_rate": 1.6131471554726653e-06, "loss": 0.6742, "step": 6582 }, { "epoch": 0.96, "grad_norm": 5.229278087615967, "learning_rate": 1.6130221095064212e-06, "loss": 0.7519, "step": 6583 }, { "epoch": 0.96, "grad_norm": 6.268256187438965, "learning_rate": 1.6128970481819045e-06, "loss": 0.7395, "step": 6584 }, { "epoch": 0.96, "grad_norm": 5.879909992218018, "learning_rate": 1.612771971502248e-06, "loss": 0.7087, "step": 6585 }, { "epoch": 0.96, "grad_norm": 5.2605462074279785, "learning_rate": 1.6126468794705854e-06, "loss": 0.6031, "step": 6586 }, { "epoch": 0.96, "grad_norm": 6.031832695007324, "learning_rate": 1.6125217720900506e-06, "loss": 0.8089, "step": 6587 }, { "epoch": 0.96, "grad_norm": 6.295870304107666, "learning_rate": 1.6123966493637782e-06, "loss": 0.7094, "step": 6588 }, { "epoch": 0.96, "grad_norm": 4.938214302062988, "learning_rate": 1.6122715112949029e-06, "loss": 0.6386, "step": 6589 }, { "epoch": 0.96, "grad_norm": 5.9484381675720215, "learning_rate": 1.6121463578865594e-06, "loss": 0.6723, "step": 6590 }, { "epoch": 0.96, "grad_norm": 5.912592887878418, "learning_rate": 1.6120211891418838e-06, "loss": 0.7531, "step": 6591 }, { "epoch": 0.96, "grad_norm": 5.551492214202881, "learning_rate": 1.6118960050640115e-06, "loss": 0.6846, "step": 6592 }, { "epoch": 0.96, "grad_norm": 4.8915276527404785, "learning_rate": 1.6117708056560792e-06, "loss": 0.6523, "step": 6593 }, { "epoch": 0.96, "grad_norm": 6.705748558044434, "learning_rate": 1.6116455909212232e-06, "loss": 0.7644, "step": 6594 }, { "epoch": 0.96, "grad_norm": 5.850874900817871, "learning_rate": 1.6115203608625812e-06, "loss": 0.8354, "step": 6595 }, { "epoch": 0.96, "grad_norm": 5.314749717712402, "learning_rate": 1.6113951154832898e-06, "loss": 0.6497, "step": 6596 }, { "epoch": 0.96, "grad_norm": 5.831190586090088, "learning_rate": 1.6112698547864873e-06, "loss": 0.7712, "step": 6597 }, { "epoch": 0.96, "grad_norm": 5.459140777587891, "learning_rate": 1.6111445787753118e-06, "loss": 0.7559, "step": 6598 }, { "epoch": 0.96, "grad_norm": 5.3029398918151855, "learning_rate": 1.6110192874529016e-06, "loss": 0.6772, "step": 6599 }, { "epoch": 0.96, "grad_norm": 6.229211807250977, "learning_rate": 1.6108939808223964e-06, "loss": 0.7573, "step": 6600 }, { "epoch": 0.96, "grad_norm": 5.189240455627441, "learning_rate": 1.6107686588869349e-06, "loss": 0.7414, "step": 6601 }, { "epoch": 0.96, "grad_norm": 5.296002388000488, "learning_rate": 1.6106433216496571e-06, "loss": 0.6532, "step": 6602 }, { "epoch": 0.96, "grad_norm": 5.79262113571167, "learning_rate": 1.6105179691137027e-06, "loss": 0.6938, "step": 6603 }, { "epoch": 0.96, "grad_norm": 5.4530510902404785, "learning_rate": 1.6103926012822132e-06, "loss": 0.6164, "step": 6604 }, { "epoch": 0.96, "grad_norm": 5.384315490722656, "learning_rate": 1.6102672181583286e-06, "loss": 0.7154, "step": 6605 }, { "epoch": 0.96, "grad_norm": 5.249422550201416, "learning_rate": 1.6101418197451904e-06, "loss": 0.7069, "step": 6606 }, { "epoch": 0.96, "grad_norm": 5.687981605529785, "learning_rate": 1.61001640604594e-06, "loss": 0.6449, "step": 6607 }, { "epoch": 0.96, "grad_norm": 5.203097343444824, "learning_rate": 1.6098909770637202e-06, "loss": 0.6529, "step": 6608 }, { "epoch": 0.96, "grad_norm": 5.037050247192383, "learning_rate": 1.6097655328016726e-06, "loss": 0.6781, "step": 6609 }, { "epoch": 0.96, "grad_norm": 5.6056413650512695, "learning_rate": 1.6096400732629404e-06, "loss": 0.7679, "step": 6610 }, { "epoch": 0.96, "grad_norm": 5.353403091430664, "learning_rate": 1.6095145984506669e-06, "loss": 0.6467, "step": 6611 }, { "epoch": 0.96, "grad_norm": 5.26096773147583, "learning_rate": 1.609389108367995e-06, "loss": 0.6286, "step": 6612 }, { "epoch": 0.96, "grad_norm": 5.926838397979736, "learning_rate": 1.6092636030180695e-06, "loss": 0.6721, "step": 6613 }, { "epoch": 0.96, "grad_norm": 4.965793609619141, "learning_rate": 1.6091380824040343e-06, "loss": 0.66, "step": 6614 }, { "epoch": 0.96, "grad_norm": 5.5132737159729, "learning_rate": 1.6090125465290347e-06, "loss": 0.7048, "step": 6615 }, { "epoch": 0.96, "grad_norm": 5.068296909332275, "learning_rate": 1.6088869953962148e-06, "loss": 0.6353, "step": 6616 }, { "epoch": 0.96, "grad_norm": 5.956631660461426, "learning_rate": 1.6087614290087205e-06, "loss": 0.6769, "step": 6617 }, { "epoch": 0.96, "grad_norm": 5.923066139221191, "learning_rate": 1.608635847369698e-06, "loss": 0.7097, "step": 6618 }, { "epoch": 0.96, "grad_norm": 5.295319080352783, "learning_rate": 1.608510250482293e-06, "loss": 0.6762, "step": 6619 }, { "epoch": 0.96, "grad_norm": 6.158694744110107, "learning_rate": 1.6083846383496528e-06, "loss": 0.6759, "step": 6620 }, { "epoch": 0.96, "grad_norm": 6.031515121459961, "learning_rate": 1.6082590109749234e-06, "loss": 0.6702, "step": 6621 }, { "epoch": 0.96, "grad_norm": 5.628087043762207, "learning_rate": 1.6081333683612532e-06, "loss": 0.6282, "step": 6622 }, { "epoch": 0.96, "grad_norm": 5.406332015991211, "learning_rate": 1.6080077105117895e-06, "loss": 0.6231, "step": 6623 }, { "epoch": 0.96, "grad_norm": 5.892935276031494, "learning_rate": 1.6078820374296806e-06, "loss": 0.7166, "step": 6624 }, { "epoch": 0.96, "grad_norm": 5.399532318115234, "learning_rate": 1.607756349118075e-06, "loss": 0.6424, "step": 6625 }, { "epoch": 0.96, "grad_norm": 5.646545886993408, "learning_rate": 1.6076306455801214e-06, "loss": 0.6999, "step": 6626 }, { "epoch": 0.96, "grad_norm": 5.563548564910889, "learning_rate": 1.6075049268189694e-06, "loss": 0.636, "step": 6627 }, { "epoch": 0.96, "grad_norm": 4.936847686767578, "learning_rate": 1.6073791928377685e-06, "loss": 0.6836, "step": 6628 }, { "epoch": 0.96, "grad_norm": 5.360935688018799, "learning_rate": 1.6072534436396687e-06, "loss": 0.6945, "step": 6629 }, { "epoch": 0.96, "grad_norm": 5.534460544586182, "learning_rate": 1.6071276792278207e-06, "loss": 0.7292, "step": 6630 }, { "epoch": 0.96, "grad_norm": 5.232723236083984, "learning_rate": 1.6070018996053751e-06, "loss": 0.6907, "step": 6631 }, { "epoch": 0.96, "grad_norm": 5.498692989349365, "learning_rate": 1.6068761047754832e-06, "loss": 0.7236, "step": 6632 }, { "epoch": 0.96, "grad_norm": 5.388874530792236, "learning_rate": 1.6067502947412965e-06, "loss": 0.6813, "step": 6633 }, { "epoch": 0.96, "grad_norm": 5.908340930938721, "learning_rate": 1.6066244695059672e-06, "loss": 0.7921, "step": 6634 }, { "epoch": 0.96, "grad_norm": 5.283679008483887, "learning_rate": 1.6064986290726475e-06, "loss": 0.7264, "step": 6635 }, { "epoch": 0.96, "grad_norm": 5.838009357452393, "learning_rate": 1.6063727734444899e-06, "loss": 0.6974, "step": 6636 }, { "epoch": 0.96, "grad_norm": 6.574392318725586, "learning_rate": 1.606246902624648e-06, "loss": 0.6996, "step": 6637 }, { "epoch": 0.96, "grad_norm": 6.124566078186035, "learning_rate": 1.6061210166162747e-06, "loss": 0.691, "step": 6638 }, { "epoch": 0.96, "grad_norm": 5.2405900955200195, "learning_rate": 1.6059951154225239e-06, "loss": 0.6419, "step": 6639 }, { "epoch": 0.96, "grad_norm": 5.185355186462402, "learning_rate": 1.6058691990465505e-06, "loss": 0.6187, "step": 6640 }, { "epoch": 0.96, "grad_norm": 6.010666370391846, "learning_rate": 1.6057432674915087e-06, "loss": 0.7655, "step": 6641 }, { "epoch": 0.96, "grad_norm": 5.957165241241455, "learning_rate": 1.6056173207605537e-06, "loss": 0.6924, "step": 6642 }, { "epoch": 0.96, "grad_norm": 5.397817611694336, "learning_rate": 1.6054913588568409e-06, "loss": 0.7519, "step": 6643 }, { "epoch": 0.96, "grad_norm": 5.61517333984375, "learning_rate": 1.6053653817835256e-06, "loss": 0.6901, "step": 6644 }, { "epoch": 0.96, "grad_norm": 5.98054838180542, "learning_rate": 1.6052393895437644e-06, "loss": 0.6696, "step": 6645 }, { "epoch": 0.96, "grad_norm": 5.34749698638916, "learning_rate": 1.6051133821407135e-06, "loss": 0.7508, "step": 6646 }, { "epoch": 0.96, "grad_norm": 5.507312774658203, "learning_rate": 1.6049873595775302e-06, "loss": 0.6933, "step": 6647 }, { "epoch": 0.96, "grad_norm": 6.637567520141602, "learning_rate": 1.6048613218573715e-06, "loss": 0.8115, "step": 6648 }, { "epoch": 0.96, "grad_norm": 5.574792385101318, "learning_rate": 1.6047352689833952e-06, "loss": 0.8401, "step": 6649 }, { "epoch": 0.96, "grad_norm": 5.8378729820251465, "learning_rate": 1.6046092009587597e-06, "loss": 0.7425, "step": 6650 }, { "epoch": 0.97, "grad_norm": 5.663810729980469, "learning_rate": 1.6044831177866227e-06, "loss": 0.7169, "step": 6651 }, { "epoch": 0.97, "grad_norm": 5.780467987060547, "learning_rate": 1.6043570194701434e-06, "loss": 0.7173, "step": 6652 }, { "epoch": 0.97, "grad_norm": 5.351181983947754, "learning_rate": 1.6042309060124807e-06, "loss": 0.7189, "step": 6653 }, { "epoch": 0.97, "grad_norm": 5.7299299240112305, "learning_rate": 1.604104777416795e-06, "loss": 0.6948, "step": 6654 }, { "epoch": 0.97, "grad_norm": 5.811917781829834, "learning_rate": 1.6039786336862452e-06, "loss": 0.7651, "step": 6655 }, { "epoch": 0.97, "grad_norm": 5.429478645324707, "learning_rate": 1.6038524748239924e-06, "loss": 0.7312, "step": 6656 }, { "epoch": 0.97, "grad_norm": 6.150044918060303, "learning_rate": 1.6037263008331968e-06, "loss": 0.6706, "step": 6657 }, { "epoch": 0.97, "grad_norm": 6.020445823669434, "learning_rate": 1.6036001117170196e-06, "loss": 0.7266, "step": 6658 }, { "epoch": 0.97, "grad_norm": 5.4138383865356445, "learning_rate": 1.6034739074786226e-06, "loss": 0.6246, "step": 6659 }, { "epoch": 0.97, "grad_norm": 4.892763137817383, "learning_rate": 1.6033476881211674e-06, "loss": 0.733, "step": 6660 }, { "epoch": 0.97, "grad_norm": 4.914052486419678, "learning_rate": 1.603221453647816e-06, "loss": 0.6897, "step": 6661 }, { "epoch": 0.97, "grad_norm": 5.4430317878723145, "learning_rate": 1.6030952040617313e-06, "loss": 0.7208, "step": 6662 }, { "epoch": 0.97, "grad_norm": 5.32147741317749, "learning_rate": 1.6029689393660762e-06, "loss": 0.7103, "step": 6663 }, { "epoch": 0.97, "grad_norm": 5.5095930099487305, "learning_rate": 1.6028426595640138e-06, "loss": 0.6743, "step": 6664 }, { "epoch": 0.97, "grad_norm": 5.0023322105407715, "learning_rate": 1.6027163646587085e-06, "loss": 0.6604, "step": 6665 }, { "epoch": 0.97, "grad_norm": 5.472001552581787, "learning_rate": 1.6025900546533238e-06, "loss": 0.6689, "step": 6666 }, { "epoch": 0.97, "grad_norm": 5.888176441192627, "learning_rate": 1.602463729551024e-06, "loss": 0.639, "step": 6667 }, { "epoch": 0.97, "grad_norm": 5.178499698638916, "learning_rate": 1.6023373893549753e-06, "loss": 0.7124, "step": 6668 }, { "epoch": 0.97, "grad_norm": 6.173895835876465, "learning_rate": 1.602211034068341e-06, "loss": 0.7296, "step": 6669 }, { "epoch": 0.97, "grad_norm": 5.5264387130737305, "learning_rate": 1.6020846636942884e-06, "loss": 0.7233, "step": 6670 }, { "epoch": 0.97, "grad_norm": 5.7934041023254395, "learning_rate": 1.6019582782359827e-06, "loss": 0.7732, "step": 6671 }, { "epoch": 0.97, "grad_norm": 5.938774108886719, "learning_rate": 1.6018318776965901e-06, "loss": 0.7619, "step": 6672 }, { "epoch": 0.97, "grad_norm": 5.346470355987549, "learning_rate": 1.601705462079278e-06, "loss": 0.749, "step": 6673 }, { "epoch": 0.97, "grad_norm": 6.111217498779297, "learning_rate": 1.6015790313872134e-06, "loss": 0.766, "step": 6674 }, { "epoch": 0.97, "grad_norm": 5.222410678863525, "learning_rate": 1.6014525856235633e-06, "loss": 0.7235, "step": 6675 }, { "epoch": 0.97, "grad_norm": 6.230980396270752, "learning_rate": 1.6013261247914959e-06, "loss": 0.8143, "step": 6676 }, { "epoch": 0.97, "grad_norm": 5.29287052154541, "learning_rate": 1.6011996488941794e-06, "loss": 0.6388, "step": 6677 }, { "epoch": 0.97, "grad_norm": 5.144959926605225, "learning_rate": 1.601073157934783e-06, "loss": 0.6384, "step": 6678 }, { "epoch": 0.97, "grad_norm": 5.632802963256836, "learning_rate": 1.600946651916475e-06, "loss": 0.7276, "step": 6679 }, { "epoch": 0.97, "grad_norm": 6.301267623901367, "learning_rate": 1.600820130842425e-06, "loss": 0.7375, "step": 6680 }, { "epoch": 0.97, "grad_norm": 5.2331767082214355, "learning_rate": 1.6006935947158028e-06, "loss": 0.6818, "step": 6681 }, { "epoch": 0.97, "grad_norm": 5.33162260055542, "learning_rate": 1.600567043539779e-06, "loss": 0.702, "step": 6682 }, { "epoch": 0.97, "grad_norm": 6.2495951652526855, "learning_rate": 1.6004404773175234e-06, "loss": 0.7248, "step": 6683 }, { "epoch": 0.97, "grad_norm": 5.045623302459717, "learning_rate": 1.6003138960522073e-06, "loss": 0.646, "step": 6684 }, { "epoch": 0.97, "grad_norm": 5.905324935913086, "learning_rate": 1.6001872997470019e-06, "loss": 0.7637, "step": 6685 }, { "epoch": 0.97, "grad_norm": 5.2653985023498535, "learning_rate": 1.6000606884050792e-06, "loss": 0.6906, "step": 6686 }, { "epoch": 0.97, "grad_norm": 4.836607933044434, "learning_rate": 1.5999340620296104e-06, "loss": 0.6767, "step": 6687 }, { "epoch": 0.97, "grad_norm": 6.07827091217041, "learning_rate": 1.5998074206237688e-06, "loss": 0.6878, "step": 6688 }, { "epoch": 0.97, "grad_norm": 5.38087272644043, "learning_rate": 1.599680764190727e-06, "loss": 0.7648, "step": 6689 }, { "epoch": 0.97, "grad_norm": 4.964794158935547, "learning_rate": 1.599554092733658e-06, "loss": 0.6361, "step": 6690 }, { "epoch": 0.97, "grad_norm": 5.560837268829346, "learning_rate": 1.5994274062557351e-06, "loss": 0.6172, "step": 6691 }, { "epoch": 0.97, "grad_norm": 5.600799560546875, "learning_rate": 1.5993007047601325e-06, "loss": 0.6296, "step": 6692 }, { "epoch": 0.97, "grad_norm": 5.598804473876953, "learning_rate": 1.5991739882500248e-06, "loss": 0.7174, "step": 6693 }, { "epoch": 0.97, "grad_norm": 7.0182204246521, "learning_rate": 1.5990472567285862e-06, "loss": 0.7998, "step": 6694 }, { "epoch": 0.97, "grad_norm": 5.898038387298584, "learning_rate": 1.5989205101989919e-06, "loss": 0.6671, "step": 6695 }, { "epoch": 0.97, "grad_norm": 5.643287181854248, "learning_rate": 1.5987937486644175e-06, "loss": 0.7027, "step": 6696 }, { "epoch": 0.97, "grad_norm": 5.700009346008301, "learning_rate": 1.5986669721280386e-06, "loss": 0.7773, "step": 6697 }, { "epoch": 0.97, "grad_norm": 6.4133405685424805, "learning_rate": 1.5985401805930314e-06, "loss": 0.7396, "step": 6698 }, { "epoch": 0.97, "grad_norm": 5.590548992156982, "learning_rate": 1.5984133740625727e-06, "loss": 0.7547, "step": 6699 }, { "epoch": 0.97, "grad_norm": 5.811938285827637, "learning_rate": 1.598286552539839e-06, "loss": 0.7383, "step": 6700 }, { "epoch": 0.97, "grad_norm": 5.213582992553711, "learning_rate": 1.598159716028008e-06, "loss": 0.6355, "step": 6701 }, { "epoch": 0.97, "grad_norm": 5.436492443084717, "learning_rate": 1.598032864530257e-06, "loss": 0.7029, "step": 6702 }, { "epoch": 0.97, "grad_norm": 5.079110145568848, "learning_rate": 1.5979059980497646e-06, "loss": 0.626, "step": 6703 }, { "epoch": 0.97, "grad_norm": 4.640590190887451, "learning_rate": 1.5977791165897088e-06, "loss": 0.5644, "step": 6704 }, { "epoch": 0.97, "grad_norm": 5.796590805053711, "learning_rate": 1.5976522201532686e-06, "loss": 0.7294, "step": 6705 }, { "epoch": 0.97, "grad_norm": 8.228394508361816, "learning_rate": 1.5975253087436228e-06, "loss": 0.8915, "step": 6706 }, { "epoch": 0.97, "grad_norm": 4.956115245819092, "learning_rate": 1.5973983823639514e-06, "loss": 0.6584, "step": 6707 }, { "epoch": 0.97, "grad_norm": 6.260542392730713, "learning_rate": 1.5972714410174343e-06, "loss": 0.7752, "step": 6708 }, { "epoch": 0.97, "grad_norm": 5.999144077301025, "learning_rate": 1.5971444847072517e-06, "loss": 0.6685, "step": 6709 }, { "epoch": 0.97, "grad_norm": 5.302271842956543, "learning_rate": 1.597017513436584e-06, "loss": 0.6837, "step": 6710 }, { "epoch": 0.97, "grad_norm": 5.584690570831299, "learning_rate": 1.5968905272086127e-06, "loss": 0.6378, "step": 6711 }, { "epoch": 0.97, "grad_norm": 5.893160820007324, "learning_rate": 1.5967635260265193e-06, "loss": 0.7817, "step": 6712 }, { "epoch": 0.97, "grad_norm": 5.971678733825684, "learning_rate": 1.5966365098934854e-06, "loss": 0.6236, "step": 6713 }, { "epoch": 0.97, "grad_norm": 5.9577250480651855, "learning_rate": 1.5965094788126931e-06, "loss": 0.7277, "step": 6714 }, { "epoch": 0.97, "grad_norm": 5.773124694824219, "learning_rate": 1.596382432787325e-06, "loss": 0.6713, "step": 6715 }, { "epoch": 0.97, "grad_norm": 5.485759258270264, "learning_rate": 1.596255371820564e-06, "loss": 0.6637, "step": 6716 }, { "epoch": 0.97, "grad_norm": 5.8578104972839355, "learning_rate": 1.5961282959155936e-06, "loss": 0.6579, "step": 6717 }, { "epoch": 0.97, "grad_norm": 5.629414081573486, "learning_rate": 1.5960012050755971e-06, "loss": 0.6599, "step": 6718 }, { "epoch": 0.97, "grad_norm": 5.736294746398926, "learning_rate": 1.5958740993037591e-06, "loss": 0.7389, "step": 6719 }, { "epoch": 0.98, "grad_norm": 5.478188514709473, "learning_rate": 1.5957469786032636e-06, "loss": 0.6613, "step": 6720 }, { "epoch": 0.98, "grad_norm": 6.114403247833252, "learning_rate": 1.5956198429772956e-06, "loss": 0.7983, "step": 6721 }, { "epoch": 0.98, "grad_norm": 5.1905975341796875, "learning_rate": 1.59549269242904e-06, "loss": 0.7346, "step": 6722 }, { "epoch": 0.98, "grad_norm": 5.115968704223633, "learning_rate": 1.5953655269616828e-06, "loss": 0.7132, "step": 6723 }, { "epoch": 0.98, "grad_norm": 5.479288101196289, "learning_rate": 1.5952383465784095e-06, "loss": 0.6837, "step": 6724 }, { "epoch": 0.98, "grad_norm": 5.126303195953369, "learning_rate": 1.5951111512824066e-06, "loss": 0.7028, "step": 6725 }, { "epoch": 0.98, "grad_norm": 5.344381809234619, "learning_rate": 1.594983941076861e-06, "loss": 0.6497, "step": 6726 }, { "epoch": 0.98, "grad_norm": 4.959508895874023, "learning_rate": 1.5948567159649596e-06, "loss": 0.7017, "step": 6727 }, { "epoch": 0.98, "grad_norm": 5.3976922035217285, "learning_rate": 1.5947294759498893e-06, "loss": 0.6991, "step": 6728 }, { "epoch": 0.98, "grad_norm": 6.106117248535156, "learning_rate": 1.5946022210348385e-06, "loss": 0.6814, "step": 6729 }, { "epoch": 0.98, "grad_norm": 6.101953029632568, "learning_rate": 1.5944749512229954e-06, "loss": 0.6511, "step": 6730 }, { "epoch": 0.98, "grad_norm": 6.473238468170166, "learning_rate": 1.5943476665175482e-06, "loss": 0.7948, "step": 6731 }, { "epoch": 0.98, "grad_norm": 5.313808917999268, "learning_rate": 1.5942203669216857e-06, "loss": 0.6724, "step": 6732 }, { "epoch": 0.98, "grad_norm": 5.410933494567871, "learning_rate": 1.5940930524385974e-06, "loss": 0.6516, "step": 6733 }, { "epoch": 0.98, "grad_norm": 5.133090972900391, "learning_rate": 1.5939657230714732e-06, "loss": 0.6879, "step": 6734 }, { "epoch": 0.98, "grad_norm": 4.985437870025635, "learning_rate": 1.593838378823503e-06, "loss": 0.6447, "step": 6735 }, { "epoch": 0.98, "grad_norm": 5.133107662200928, "learning_rate": 1.593711019697877e-06, "loss": 0.7079, "step": 6736 }, { "epoch": 0.98, "grad_norm": 5.5207600593566895, "learning_rate": 1.593583645697786e-06, "loss": 0.6793, "step": 6737 }, { "epoch": 0.98, "grad_norm": 6.389774322509766, "learning_rate": 1.5934562568264212e-06, "loss": 0.6329, "step": 6738 }, { "epoch": 0.98, "grad_norm": 6.199825286865234, "learning_rate": 1.593328853086974e-06, "loss": 0.7064, "step": 6739 }, { "epoch": 0.98, "grad_norm": 5.944381237030029, "learning_rate": 1.5932014344826366e-06, "loss": 0.7327, "step": 6740 }, { "epoch": 0.98, "grad_norm": 5.000296115875244, "learning_rate": 1.5930740010166013e-06, "loss": 0.6122, "step": 6741 }, { "epoch": 0.98, "grad_norm": 5.3697123527526855, "learning_rate": 1.5929465526920606e-06, "loss": 0.6853, "step": 6742 }, { "epoch": 0.98, "grad_norm": 5.631184101104736, "learning_rate": 1.5928190895122069e-06, "loss": 0.7268, "step": 6743 }, { "epoch": 0.98, "grad_norm": 6.469388008117676, "learning_rate": 1.5926916114802344e-06, "loss": 0.7174, "step": 6744 }, { "epoch": 0.98, "grad_norm": 5.333827972412109, "learning_rate": 1.5925641185993369e-06, "loss": 0.6984, "step": 6745 }, { "epoch": 0.98, "grad_norm": 5.598080158233643, "learning_rate": 1.5924366108727076e-06, "loss": 0.6783, "step": 6746 }, { "epoch": 0.98, "grad_norm": 5.531167030334473, "learning_rate": 1.5923090883035424e-06, "loss": 0.7337, "step": 6747 }, { "epoch": 0.98, "grad_norm": 5.999665260314941, "learning_rate": 1.592181550895035e-06, "loss": 0.7341, "step": 6748 }, { "epoch": 0.98, "grad_norm": 5.252035617828369, "learning_rate": 1.5920539986503809e-06, "loss": 0.7228, "step": 6749 }, { "epoch": 0.98, "grad_norm": 5.523674011230469, "learning_rate": 1.591926431572776e-06, "loss": 0.596, "step": 6750 }, { "epoch": 0.98, "grad_norm": 4.6952667236328125, "learning_rate": 1.5917988496654163e-06, "loss": 0.6544, "step": 6751 }, { "epoch": 0.98, "grad_norm": 7.9018940925598145, "learning_rate": 1.5916712529314976e-06, "loss": 0.7801, "step": 6752 }, { "epoch": 0.98, "grad_norm": 4.948734760284424, "learning_rate": 1.5915436413742174e-06, "loss": 0.691, "step": 6753 }, { "epoch": 0.98, "grad_norm": 5.777400016784668, "learning_rate": 1.5914160149967723e-06, "loss": 0.7086, "step": 6754 }, { "epoch": 0.98, "grad_norm": 5.606715679168701, "learning_rate": 1.59128837380236e-06, "loss": 0.6458, "step": 6755 }, { "epoch": 0.98, "grad_norm": 5.828464031219482, "learning_rate": 1.5911607177941781e-06, "loss": 0.7331, "step": 6756 }, { "epoch": 0.98, "grad_norm": 5.82075309753418, "learning_rate": 1.5910330469754248e-06, "loss": 0.6717, "step": 6757 }, { "epoch": 0.98, "grad_norm": 5.549993991851807, "learning_rate": 1.5909053613492992e-06, "loss": 0.5826, "step": 6758 }, { "epoch": 0.98, "grad_norm": 7.377275466918945, "learning_rate": 1.5907776609189996e-06, "loss": 0.7396, "step": 6759 }, { "epoch": 0.98, "grad_norm": 5.424240589141846, "learning_rate": 1.5906499456877258e-06, "loss": 0.7031, "step": 6760 }, { "epoch": 0.98, "grad_norm": 6.247587203979492, "learning_rate": 1.590522215658677e-06, "loss": 0.8056, "step": 6761 }, { "epoch": 0.98, "grad_norm": 6.239018440246582, "learning_rate": 1.5903944708350541e-06, "loss": 0.6091, "step": 6762 }, { "epoch": 0.98, "grad_norm": 5.429444789886475, "learning_rate": 1.5902667112200567e-06, "loss": 0.6379, "step": 6763 }, { "epoch": 0.98, "grad_norm": 6.181107997894287, "learning_rate": 1.5901389368168858e-06, "loss": 0.6264, "step": 6764 }, { "epoch": 0.98, "grad_norm": 5.690399646759033, "learning_rate": 1.590011147628743e-06, "loss": 0.7122, "step": 6765 }, { "epoch": 0.98, "grad_norm": 5.4169816970825195, "learning_rate": 1.5898833436588295e-06, "loss": 0.6442, "step": 6766 }, { "epoch": 0.98, "grad_norm": 6.376036167144775, "learning_rate": 1.5897555249103472e-06, "loss": 0.7241, "step": 6767 }, { "epoch": 0.98, "grad_norm": 6.255111217498779, "learning_rate": 1.5896276913864983e-06, "loss": 0.7725, "step": 6768 }, { "epoch": 0.98, "grad_norm": 6.59190034866333, "learning_rate": 1.5894998430904861e-06, "loss": 0.6666, "step": 6769 }, { "epoch": 0.98, "grad_norm": 5.43876314163208, "learning_rate": 1.5893719800255128e-06, "loss": 0.6828, "step": 6770 }, { "epoch": 0.98, "grad_norm": 5.945074558258057, "learning_rate": 1.5892441021947823e-06, "loss": 0.7433, "step": 6771 }, { "epoch": 0.98, "grad_norm": 5.281919479370117, "learning_rate": 1.589116209601498e-06, "loss": 0.6976, "step": 6772 }, { "epoch": 0.98, "grad_norm": 5.689569473266602, "learning_rate": 1.5889883022488645e-06, "loss": 0.7075, "step": 6773 }, { "epoch": 0.98, "grad_norm": 5.817089557647705, "learning_rate": 1.5888603801400863e-06, "loss": 0.7541, "step": 6774 }, { "epoch": 0.98, "grad_norm": 5.839277267456055, "learning_rate": 1.588732443278368e-06, "loss": 0.6994, "step": 6775 }, { "epoch": 0.98, "grad_norm": 5.14393424987793, "learning_rate": 1.5886044916669148e-06, "loss": 0.7128, "step": 6776 }, { "epoch": 0.98, "grad_norm": 5.627054214477539, "learning_rate": 1.5884765253089324e-06, "loss": 0.7169, "step": 6777 }, { "epoch": 0.98, "grad_norm": 6.4605255126953125, "learning_rate": 1.5883485442076269e-06, "loss": 0.6874, "step": 6778 }, { "epoch": 0.98, "grad_norm": 5.108909606933594, "learning_rate": 1.5882205483662046e-06, "loss": 0.6974, "step": 6779 }, { "epoch": 0.98, "grad_norm": 4.733623504638672, "learning_rate": 1.588092537787872e-06, "loss": 0.6418, "step": 6780 }, { "epoch": 0.98, "grad_norm": 5.577900409698486, "learning_rate": 1.5879645124758365e-06, "loss": 0.6944, "step": 6781 }, { "epoch": 0.98, "grad_norm": 5.51566219329834, "learning_rate": 1.5878364724333054e-06, "loss": 0.69, "step": 6782 }, { "epoch": 0.98, "grad_norm": 5.041704177856445, "learning_rate": 1.587708417663487e-06, "loss": 0.6423, "step": 6783 }, { "epoch": 0.98, "grad_norm": 5.430652618408203, "learning_rate": 1.587580348169589e-06, "loss": 0.7346, "step": 6784 }, { "epoch": 0.98, "grad_norm": 5.267078399658203, "learning_rate": 1.58745226395482e-06, "loss": 0.6998, "step": 6785 }, { "epoch": 0.98, "grad_norm": 5.231916904449463, "learning_rate": 1.5873241650223891e-06, "loss": 0.7667, "step": 6786 }, { "epoch": 0.98, "grad_norm": 5.596725940704346, "learning_rate": 1.5871960513755056e-06, "loss": 0.7267, "step": 6787 }, { "epoch": 0.98, "grad_norm": 5.206923484802246, "learning_rate": 1.5870679230173786e-06, "loss": 0.7094, "step": 6788 }, { "epoch": 0.99, "grad_norm": 5.672766208648682, "learning_rate": 1.5869397799512193e-06, "loss": 0.7198, "step": 6789 }, { "epoch": 0.99, "grad_norm": 5.880365371704102, "learning_rate": 1.5868116221802374e-06, "loss": 0.762, "step": 6790 }, { "epoch": 0.99, "grad_norm": 5.104866027832031, "learning_rate": 1.5866834497076435e-06, "loss": 0.6345, "step": 6791 }, { "epoch": 0.99, "grad_norm": 6.6560139656066895, "learning_rate": 1.5865552625366495e-06, "loss": 0.8941, "step": 6792 }, { "epoch": 0.99, "grad_norm": 6.026875972747803, "learning_rate": 1.586427060670466e-06, "loss": 0.6679, "step": 6793 }, { "epoch": 0.99, "grad_norm": 5.305830955505371, "learning_rate": 1.5862988441123056e-06, "loss": 0.6778, "step": 6794 }, { "epoch": 0.99, "grad_norm": 5.771761417388916, "learning_rate": 1.58617061286538e-06, "loss": 0.739, "step": 6795 }, { "epoch": 0.99, "grad_norm": 5.982187747955322, "learning_rate": 1.586042366932902e-06, "loss": 0.7381, "step": 6796 }, { "epoch": 0.99, "grad_norm": 5.974911212921143, "learning_rate": 1.5859141063180853e-06, "loss": 0.6757, "step": 6797 }, { "epoch": 0.99, "grad_norm": 5.705862045288086, "learning_rate": 1.5857858310241422e-06, "loss": 0.6993, "step": 6798 }, { "epoch": 0.99, "grad_norm": 6.186208724975586, "learning_rate": 1.5856575410542871e-06, "loss": 0.7461, "step": 6799 }, { "epoch": 0.99, "grad_norm": 6.065007209777832, "learning_rate": 1.5855292364117339e-06, "loss": 0.6368, "step": 6800 }, { "epoch": 0.99, "grad_norm": 5.646978378295898, "learning_rate": 1.585400917099697e-06, "loss": 0.7062, "step": 6801 }, { "epoch": 0.99, "grad_norm": 5.352683067321777, "learning_rate": 1.5852725831213913e-06, "loss": 0.6809, "step": 6802 }, { "epoch": 0.99, "grad_norm": 5.758120059967041, "learning_rate": 1.5851442344800322e-06, "loss": 0.6986, "step": 6803 }, { "epoch": 0.99, "grad_norm": 5.688066482543945, "learning_rate": 1.585015871178835e-06, "loss": 0.7174, "step": 6804 }, { "epoch": 0.99, "grad_norm": 5.224372863769531, "learning_rate": 1.5848874932210154e-06, "loss": 0.6856, "step": 6805 }, { "epoch": 0.99, "grad_norm": 5.911086082458496, "learning_rate": 1.5847591006097902e-06, "loss": 0.7156, "step": 6806 }, { "epoch": 0.99, "grad_norm": 4.757909297943115, "learning_rate": 1.5846306933483763e-06, "loss": 0.6671, "step": 6807 }, { "epoch": 0.99, "grad_norm": 5.350156784057617, "learning_rate": 1.5845022714399897e-06, "loss": 0.7506, "step": 6808 }, { "epoch": 0.99, "grad_norm": 5.628818988800049, "learning_rate": 1.5843738348878483e-06, "loss": 0.734, "step": 6809 }, { "epoch": 0.99, "grad_norm": 4.867550849914551, "learning_rate": 1.5842453836951703e-06, "loss": 0.6695, "step": 6810 }, { "epoch": 0.99, "grad_norm": 5.241937160491943, "learning_rate": 1.5841169178651737e-06, "loss": 0.7048, "step": 6811 }, { "epoch": 0.99, "grad_norm": 5.325127124786377, "learning_rate": 1.5839884374010765e-06, "loss": 0.6357, "step": 6812 }, { "epoch": 0.99, "grad_norm": 6.805673599243164, "learning_rate": 1.583859942306098e-06, "loss": 0.7906, "step": 6813 }, { "epoch": 0.99, "grad_norm": 5.3547868728637695, "learning_rate": 1.5837314325834571e-06, "loss": 0.7418, "step": 6814 }, { "epoch": 0.99, "grad_norm": 5.7078776359558105, "learning_rate": 1.583602908236374e-06, "loss": 0.7092, "step": 6815 }, { "epoch": 0.99, "grad_norm": 5.243204116821289, "learning_rate": 1.5834743692680682e-06, "loss": 0.6249, "step": 6816 }, { "epoch": 0.99, "grad_norm": 5.677181243896484, "learning_rate": 1.5833458156817599e-06, "loss": 0.8285, "step": 6817 }, { "epoch": 0.99, "grad_norm": 5.2813191413879395, "learning_rate": 1.5832172474806702e-06, "loss": 0.6434, "step": 6818 }, { "epoch": 0.99, "grad_norm": 6.282449245452881, "learning_rate": 1.5830886646680198e-06, "loss": 0.7085, "step": 6819 }, { "epoch": 0.99, "grad_norm": 5.760742664337158, "learning_rate": 1.5829600672470304e-06, "loss": 0.7242, "step": 6820 }, { "epoch": 0.99, "grad_norm": 5.277778625488281, "learning_rate": 1.5828314552209242e-06, "loss": 0.6689, "step": 6821 }, { "epoch": 0.99, "grad_norm": 5.709842205047607, "learning_rate": 1.582702828592922e-06, "loss": 0.7784, "step": 6822 }, { "epoch": 0.99, "grad_norm": 5.858206272125244, "learning_rate": 1.582574187366248e-06, "loss": 0.6618, "step": 6823 }, { "epoch": 0.99, "grad_norm": 6.434065818786621, "learning_rate": 1.582445531544124e-06, "loss": 0.7163, "step": 6824 }, { "epoch": 0.99, "grad_norm": 5.2617411613464355, "learning_rate": 1.5823168611297734e-06, "loss": 0.6293, "step": 6825 }, { "epoch": 0.99, "grad_norm": 5.3277435302734375, "learning_rate": 1.5821881761264205e-06, "loss": 0.635, "step": 6826 }, { "epoch": 0.99, "grad_norm": 5.537979602813721, "learning_rate": 1.5820594765372883e-06, "loss": 0.7148, "step": 6827 }, { "epoch": 0.99, "grad_norm": 5.6728973388671875, "learning_rate": 1.581930762365602e-06, "loss": 0.6601, "step": 6828 }, { "epoch": 0.99, "grad_norm": 5.644803047180176, "learning_rate": 1.5818020336145858e-06, "loss": 0.7466, "step": 6829 }, { "epoch": 0.99, "grad_norm": 5.3768391609191895, "learning_rate": 1.581673290287465e-06, "loss": 0.741, "step": 6830 }, { "epoch": 0.99, "grad_norm": 5.648855209350586, "learning_rate": 1.5815445323874653e-06, "loss": 0.7359, "step": 6831 }, { "epoch": 0.99, "grad_norm": 6.036427021026611, "learning_rate": 1.581415759917812e-06, "loss": 0.6781, "step": 6832 }, { "epoch": 0.99, "grad_norm": 6.100082874298096, "learning_rate": 1.5812869728817316e-06, "loss": 0.7574, "step": 6833 }, { "epoch": 0.99, "grad_norm": 4.9448723793029785, "learning_rate": 1.5811581712824505e-06, "loss": 0.6615, "step": 6834 }, { "epoch": 0.99, "grad_norm": 4.987742900848389, "learning_rate": 1.5810293551231957e-06, "loss": 0.7195, "step": 6835 }, { "epoch": 0.99, "grad_norm": 5.3431806564331055, "learning_rate": 1.5809005244071948e-06, "loss": 0.6733, "step": 6836 }, { "epoch": 0.99, "grad_norm": 5.668935298919678, "learning_rate": 1.5807716791376747e-06, "loss": 0.7217, "step": 6837 }, { "epoch": 0.99, "grad_norm": 5.359765529632568, "learning_rate": 1.5806428193178639e-06, "loss": 0.6618, "step": 6838 }, { "epoch": 0.99, "grad_norm": 5.252436637878418, "learning_rate": 1.5805139449509907e-06, "loss": 0.642, "step": 6839 }, { "epoch": 0.99, "grad_norm": 5.623973369598389, "learning_rate": 1.580385056040284e-06, "loss": 0.7134, "step": 6840 }, { "epoch": 0.99, "grad_norm": 6.313292503356934, "learning_rate": 1.5802561525889728e-06, "loss": 0.753, "step": 6841 }, { "epoch": 0.99, "grad_norm": 5.550934314727783, "learning_rate": 1.5801272346002863e-06, "loss": 0.6559, "step": 6842 }, { "epoch": 0.99, "grad_norm": 5.729117393493652, "learning_rate": 1.5799983020774547e-06, "loss": 0.7553, "step": 6843 }, { "epoch": 0.99, "grad_norm": 6.009291172027588, "learning_rate": 1.579869355023708e-06, "loss": 0.7589, "step": 6844 }, { "epoch": 0.99, "grad_norm": 5.503437519073486, "learning_rate": 1.5797403934422765e-06, "loss": 0.734, "step": 6845 }, { "epoch": 0.99, "grad_norm": 6.156014442443848, "learning_rate": 1.5796114173363918e-06, "loss": 0.6125, "step": 6846 }, { "epoch": 0.99, "grad_norm": 5.879505634307861, "learning_rate": 1.579482426709285e-06, "loss": 0.7008, "step": 6847 }, { "epoch": 0.99, "grad_norm": 5.337815761566162, "learning_rate": 1.5793534215641872e-06, "loss": 0.6746, "step": 6848 }, { "epoch": 0.99, "grad_norm": 5.547779560089111, "learning_rate": 1.5792244019043307e-06, "loss": 0.7873, "step": 6849 }, { "epoch": 0.99, "grad_norm": 4.660458564758301, "learning_rate": 1.579095367732948e-06, "loss": 0.6406, "step": 6850 }, { "epoch": 0.99, "grad_norm": 5.324591159820557, "learning_rate": 1.578966319053272e-06, "loss": 0.684, "step": 6851 }, { "epoch": 0.99, "grad_norm": 5.839733600616455, "learning_rate": 1.5788372558685357e-06, "loss": 0.7504, "step": 6852 }, { "epoch": 0.99, "grad_norm": 6.002075672149658, "learning_rate": 1.578708178181972e-06, "loss": 0.7215, "step": 6853 }, { "epoch": 0.99, "grad_norm": 5.989418029785156, "learning_rate": 1.5785790859968155e-06, "loss": 0.7088, "step": 6854 }, { "epoch": 0.99, "grad_norm": 6.498393535614014, "learning_rate": 1.5784499793163002e-06, "loss": 0.8507, "step": 6855 }, { "epoch": 0.99, "grad_norm": 4.954578399658203, "learning_rate": 1.5783208581436602e-06, "loss": 0.6706, "step": 6856 }, { "epoch": 0.99, "grad_norm": 5.442477703094482, "learning_rate": 1.578191722482131e-06, "loss": 0.7117, "step": 6857 }, { "epoch": 1.0, "grad_norm": 5.867734909057617, "learning_rate": 1.5780625723349477e-06, "loss": 0.7515, "step": 6858 }, { "epoch": 1.0, "grad_norm": 5.289011001586914, "learning_rate": 1.5779334077053458e-06, "loss": 0.6443, "step": 6859 }, { "epoch": 1.0, "grad_norm": 5.655259132385254, "learning_rate": 1.5778042285965615e-06, "loss": 0.8518, "step": 6860 }, { "epoch": 1.0, "grad_norm": 4.818017959594727, "learning_rate": 1.577675035011831e-06, "loss": 0.6617, "step": 6861 }, { "epoch": 1.0, "grad_norm": 5.1594696044921875, "learning_rate": 1.5775458269543913e-06, "loss": 0.6948, "step": 6862 }, { "epoch": 1.0, "grad_norm": 5.079858779907227, "learning_rate": 1.5774166044274791e-06, "loss": 0.6891, "step": 6863 }, { "epoch": 1.0, "grad_norm": 5.769890785217285, "learning_rate": 1.5772873674343322e-06, "loss": 0.6824, "step": 6864 }, { "epoch": 1.0, "grad_norm": 5.157453536987305, "learning_rate": 1.577158115978188e-06, "loss": 0.6701, "step": 6865 }, { "epoch": 1.0, "grad_norm": 5.37011194229126, "learning_rate": 1.5770288500622854e-06, "loss": 0.6914, "step": 6866 }, { "epoch": 1.0, "grad_norm": 5.080226898193359, "learning_rate": 1.5768995696898623e-06, "loss": 0.6498, "step": 6867 }, { "epoch": 1.0, "grad_norm": 5.461238384246826, "learning_rate": 1.576770274864158e-06, "loss": 0.6553, "step": 6868 }, { "epoch": 1.0, "grad_norm": 5.791776657104492, "learning_rate": 1.5766409655884117e-06, "loss": 0.7572, "step": 6869 }, { "epoch": 1.0, "grad_norm": 5.938854217529297, "learning_rate": 1.5765116418658625e-06, "loss": 0.6787, "step": 6870 }, { "epoch": 1.0, "grad_norm": 5.641560077667236, "learning_rate": 1.5763823036997512e-06, "loss": 0.6155, "step": 6871 }, { "epoch": 1.0, "grad_norm": 5.3728718757629395, "learning_rate": 1.5762529510933178e-06, "loss": 0.6948, "step": 6872 }, { "epoch": 1.0, "grad_norm": 4.862461566925049, "learning_rate": 1.576123584049803e-06, "loss": 0.6203, "step": 6873 }, { "epoch": 1.0, "grad_norm": 5.424590110778809, "learning_rate": 1.5759942025724477e-06, "loss": 0.6283, "step": 6874 }, { "epoch": 1.0, "grad_norm": 5.074213981628418, "learning_rate": 1.5758648066644938e-06, "loss": 0.6617, "step": 6875 }, { "epoch": 1.0, "grad_norm": 5.510207176208496, "learning_rate": 1.5757353963291827e-06, "loss": 0.6486, "step": 6876 }, { "epoch": 1.0, "grad_norm": 5.305023670196533, "learning_rate": 1.5756059715697567e-06, "loss": 0.6734, "step": 6877 }, { "epoch": 1.0, "grad_norm": 5.660096168518066, "learning_rate": 1.5754765323894583e-06, "loss": 0.7199, "step": 6878 }, { "epoch": 1.0, "grad_norm": 5.611563682556152, "learning_rate": 1.5753470787915308e-06, "loss": 0.7222, "step": 6879 }, { "epoch": 1.0, "grad_norm": 5.025609016418457, "learning_rate": 1.5752176107792168e-06, "loss": 0.6146, "step": 6880 }, { "epoch": 1.0, "grad_norm": 5.629481792449951, "learning_rate": 1.5750881283557603e-06, "loss": 0.7933, "step": 6881 }, { "epoch": 1.0, "grad_norm": 5.682461261749268, "learning_rate": 1.574958631524405e-06, "loss": 0.7295, "step": 6882 }, { "epoch": 1.0, "grad_norm": 5.894015312194824, "learning_rate": 1.5748291202883954e-06, "loss": 0.7349, "step": 6883 }, { "epoch": 1.0, "grad_norm": 5.520208835601807, "learning_rate": 1.5746995946509763e-06, "loss": 0.5938, "step": 6884 }, { "epoch": 1.0, "grad_norm": 5.240843772888184, "learning_rate": 1.5745700546153927e-06, "loss": 0.6232, "step": 6885 }, { "epoch": 1.0, "grad_norm": 4.965578079223633, "learning_rate": 1.57444050018489e-06, "loss": 0.6981, "step": 6886 }, { "epoch": 1.0, "grad_norm": 5.493435382843018, "learning_rate": 1.5743109313627135e-06, "loss": 0.6429, "step": 6887 }, { "epoch": 1.0, "grad_norm": 6.781231880187988, "learning_rate": 1.5741813481521104e-06, "loss": 0.6779, "step": 6888 }, { "epoch": 1.0, "grad_norm": 5.827303886413574, "learning_rate": 1.5740517505563261e-06, "loss": 0.7887, "step": 6889 }, { "epoch": 1.0, "grad_norm": 6.729274272918701, "learning_rate": 1.5739221385786081e-06, "loss": 0.6829, "step": 6890 }, { "epoch": 1.0, "grad_norm": 4.830775260925293, "learning_rate": 1.5737925122222032e-06, "loss": 0.6485, "step": 6891 }, { "epoch": 1.0, "grad_norm": 5.681775093078613, "learning_rate": 1.5736628714903596e-06, "loss": 0.7795, "step": 6892 }, { "epoch": 1.0, "grad_norm": 5.850132465362549, "learning_rate": 1.5735332163863248e-06, "loss": 0.6566, "step": 6893 }, { "epoch": 1.0, "grad_norm": 5.599728584289551, "learning_rate": 1.5734035469133471e-06, "loss": 0.5424, "step": 6894 }, { "epoch": 1.0, "grad_norm": 5.856912136077881, "learning_rate": 1.573273863074675e-06, "loss": 0.5255, "step": 6895 }, { "epoch": 1.0, "grad_norm": 5.377164363861084, "learning_rate": 1.573144164873558e-06, "loss": 0.5506, "step": 6896 }, { "epoch": 1.0, "grad_norm": 5.0423431396484375, "learning_rate": 1.5730144523132452e-06, "loss": 0.5598, "step": 6897 }, { "epoch": 1.0, "grad_norm": 5.039190292358398, "learning_rate": 1.5728847253969863e-06, "loss": 0.5498, "step": 6898 }, { "epoch": 1.0, "grad_norm": 5.017648696899414, "learning_rate": 1.5727549841280313e-06, "loss": 0.5903, "step": 6899 }, { "epoch": 1.0, "grad_norm": 5.734566688537598, "learning_rate": 1.5726252285096312e-06, "loss": 0.5491, "step": 6900 }, { "epoch": 1.0, "grad_norm": 5.409719467163086, "learning_rate": 1.5724954585450359e-06, "loss": 0.5888, "step": 6901 }, { "epoch": 1.0, "grad_norm": 5.954215049743652, "learning_rate": 1.5723656742374974e-06, "loss": 0.5888, "step": 6902 }, { "epoch": 1.0, "grad_norm": 6.35433292388916, "learning_rate": 1.5722358755902667e-06, "loss": 0.5567, "step": 6903 }, { "epoch": 1.0, "grad_norm": 6.225040435791016, "learning_rate": 1.572106062606596e-06, "loss": 0.5782, "step": 6904 }, { "epoch": 1.0, "grad_norm": 6.2321906089782715, "learning_rate": 1.5719762352897376e-06, "loss": 0.5276, "step": 6905 }, { "epoch": 1.0, "grad_norm": 6.226898193359375, "learning_rate": 1.571846393642944e-06, "loss": 0.6143, "step": 6906 }, { "epoch": 1.0, "grad_norm": 6.387150764465332, "learning_rate": 1.571716537669468e-06, "loss": 0.5742, "step": 6907 }, { "epoch": 1.0, "grad_norm": 7.447854518890381, "learning_rate": 1.5715866673725634e-06, "loss": 0.6268, "step": 6908 }, { "epoch": 1.0, "grad_norm": 6.033560752868652, "learning_rate": 1.5714567827554833e-06, "loss": 0.5976, "step": 6909 }, { "epoch": 1.0, "grad_norm": 6.859652042388916, "learning_rate": 1.571326883821482e-06, "loss": 0.5755, "step": 6910 }, { "epoch": 1.0, "grad_norm": 5.610099792480469, "learning_rate": 1.5711969705738141e-06, "loss": 0.6147, "step": 6911 }, { "epoch": 1.0, "grad_norm": 6.364184379577637, "learning_rate": 1.5710670430157341e-06, "loss": 0.5355, "step": 6912 }, { "epoch": 1.0, "grad_norm": 5.859347820281982, "learning_rate": 1.5709371011504973e-06, "loss": 0.5603, "step": 6913 }, { "epoch": 1.0, "grad_norm": 6.564237594604492, "learning_rate": 1.5708071449813592e-06, "loss": 0.4729, "step": 6914 }, { "epoch": 1.0, "grad_norm": 5.534538269042969, "learning_rate": 1.5706771745115751e-06, "loss": 0.5398, "step": 6915 }, { "epoch": 1.0, "grad_norm": 5.983188152313232, "learning_rate": 1.5705471897444022e-06, "loss": 0.6393, "step": 6916 }, { "epoch": 1.0, "grad_norm": 5.4285993576049805, "learning_rate": 1.5704171906830962e-06, "loss": 0.5381, "step": 6917 }, { "epoch": 1.0, "grad_norm": 5.462253570556641, "learning_rate": 1.5702871773309144e-06, "loss": 0.5954, "step": 6918 }, { "epoch": 1.0, "grad_norm": 5.162570953369141, "learning_rate": 1.5701571496911142e-06, "loss": 0.4888, "step": 6919 }, { "epoch": 1.0, "grad_norm": 5.556735038757324, "learning_rate": 1.5700271077669527e-06, "loss": 0.4844, "step": 6920 }, { "epoch": 1.0, "grad_norm": 5.719781398773193, "learning_rate": 1.5698970515616883e-06, "loss": 0.5888, "step": 6921 }, { "epoch": 1.0, "grad_norm": 5.8775763511657715, "learning_rate": 1.5697669810785792e-06, "loss": 0.6248, "step": 6922 }, { "epoch": 1.0, "grad_norm": 5.962459564208984, "learning_rate": 1.5696368963208842e-06, "loss": 0.5703, "step": 6923 }, { "epoch": 1.0, "grad_norm": 6.310135364532471, "learning_rate": 1.5695067972918623e-06, "loss": 0.5514, "step": 6924 }, { "epoch": 1.0, "grad_norm": 5.764404296875, "learning_rate": 1.5693766839947728e-06, "loss": 0.5228, "step": 6925 }, { "epoch": 1.0, "grad_norm": 5.27529764175415, "learning_rate": 1.569246556432876e-06, "loss": 0.5365, "step": 6926 }, { "epoch": 1.01, "grad_norm": 6.246670722961426, "learning_rate": 1.5691164146094313e-06, "loss": 0.6186, "step": 6927 }, { "epoch": 1.01, "grad_norm": 6.118890285491943, "learning_rate": 1.5689862585276999e-06, "loss": 0.5928, "step": 6928 }, { "epoch": 1.01, "grad_norm": 5.66864013671875, "learning_rate": 1.568856088190942e-06, "loss": 0.5124, "step": 6929 }, { "epoch": 1.01, "grad_norm": 6.03685188293457, "learning_rate": 1.568725903602419e-06, "loss": 0.578, "step": 6930 }, { "epoch": 1.01, "grad_norm": 6.188803672790527, "learning_rate": 1.5685957047653931e-06, "loss": 0.6165, "step": 6931 }, { "epoch": 1.01, "grad_norm": 5.944730281829834, "learning_rate": 1.5684654916831255e-06, "loss": 0.5563, "step": 6932 }, { "epoch": 1.01, "grad_norm": 6.021871566772461, "learning_rate": 1.5683352643588784e-06, "loss": 0.5851, "step": 6933 }, { "epoch": 1.01, "grad_norm": 5.298027515411377, "learning_rate": 1.5682050227959148e-06, "loss": 0.4884, "step": 6934 }, { "epoch": 1.01, "grad_norm": 5.561127662658691, "learning_rate": 1.5680747669974976e-06, "loss": 0.5424, "step": 6935 }, { "epoch": 1.01, "grad_norm": 5.718303203582764, "learning_rate": 1.5679444969668903e-06, "loss": 0.4657, "step": 6936 }, { "epoch": 1.01, "grad_norm": 6.006762981414795, "learning_rate": 1.5678142127073563e-06, "loss": 0.5461, "step": 6937 }, { "epoch": 1.01, "grad_norm": 5.222312927246094, "learning_rate": 1.56768391422216e-06, "loss": 0.5435, "step": 6938 }, { "epoch": 1.01, "grad_norm": 6.247830867767334, "learning_rate": 1.5675536015145655e-06, "loss": 0.4568, "step": 6939 }, { "epoch": 1.01, "grad_norm": 5.344241619110107, "learning_rate": 1.5674232745878372e-06, "loss": 0.5116, "step": 6940 }, { "epoch": 1.01, "grad_norm": 5.562955856323242, "learning_rate": 1.567292933445241e-06, "loss": 0.5984, "step": 6941 }, { "epoch": 1.01, "grad_norm": 6.245638847351074, "learning_rate": 1.5671625780900426e-06, "loss": 0.4859, "step": 6942 }, { "epoch": 1.01, "grad_norm": 6.7353057861328125, "learning_rate": 1.567032208525507e-06, "loss": 0.5276, "step": 6943 }, { "epoch": 1.01, "grad_norm": 5.454596996307373, "learning_rate": 1.5669018247549008e-06, "loss": 0.5263, "step": 6944 }, { "epoch": 1.01, "grad_norm": 5.441352844238281, "learning_rate": 1.5667714267814903e-06, "loss": 0.5684, "step": 6945 }, { "epoch": 1.01, "grad_norm": 6.587043285369873, "learning_rate": 1.5666410146085429e-06, "loss": 0.5995, "step": 6946 }, { "epoch": 1.01, "grad_norm": 6.229189395904541, "learning_rate": 1.5665105882393252e-06, "loss": 0.5609, "step": 6947 }, { "epoch": 1.01, "grad_norm": 6.210641384124756, "learning_rate": 1.5663801476771057e-06, "loss": 0.5082, "step": 6948 }, { "epoch": 1.01, "grad_norm": 6.412014007568359, "learning_rate": 1.5662496929251514e-06, "loss": 0.5715, "step": 6949 }, { "epoch": 1.01, "grad_norm": 5.829818248748779, "learning_rate": 1.5661192239867314e-06, "loss": 0.5563, "step": 6950 }, { "epoch": 1.01, "grad_norm": 7.16384744644165, "learning_rate": 1.565988740865114e-06, "loss": 0.6219, "step": 6951 }, { "epoch": 1.01, "grad_norm": 6.716761589050293, "learning_rate": 1.5658582435635682e-06, "loss": 0.5271, "step": 6952 }, { "epoch": 1.01, "grad_norm": 6.047248363494873, "learning_rate": 1.5657277320853637e-06, "loss": 0.5856, "step": 6953 }, { "epoch": 1.01, "grad_norm": 5.966394901275635, "learning_rate": 1.56559720643377e-06, "loss": 0.5031, "step": 6954 }, { "epoch": 1.01, "grad_norm": 6.121930122375488, "learning_rate": 1.5654666666120572e-06, "loss": 0.5528, "step": 6955 }, { "epoch": 1.01, "grad_norm": 5.818629264831543, "learning_rate": 1.565336112623496e-06, "loss": 0.4715, "step": 6956 }, { "epoch": 1.01, "grad_norm": 5.964868545532227, "learning_rate": 1.565205544471357e-06, "loss": 0.4865, "step": 6957 }, { "epoch": 1.01, "grad_norm": 6.386423587799072, "learning_rate": 1.5650749621589117e-06, "loss": 0.4937, "step": 6958 }, { "epoch": 1.01, "grad_norm": 5.869820594787598, "learning_rate": 1.5649443656894312e-06, "loss": 0.6756, "step": 6959 }, { "epoch": 1.01, "grad_norm": 5.996466636657715, "learning_rate": 1.5648137550661872e-06, "loss": 0.5626, "step": 6960 }, { "epoch": 1.01, "grad_norm": 6.033593654632568, "learning_rate": 1.5646831302924528e-06, "loss": 0.5058, "step": 6961 }, { "epoch": 1.01, "grad_norm": 5.674510955810547, "learning_rate": 1.5645524913714997e-06, "loss": 0.5397, "step": 6962 }, { "epoch": 1.01, "grad_norm": 5.217618942260742, "learning_rate": 1.5644218383066016e-06, "loss": 0.5372, "step": 6963 }, { "epoch": 1.01, "grad_norm": 6.135904312133789, "learning_rate": 1.5642911711010314e-06, "loss": 0.5556, "step": 6964 }, { "epoch": 1.01, "grad_norm": 5.766002178192139, "learning_rate": 1.5641604897580626e-06, "loss": 0.5755, "step": 6965 }, { "epoch": 1.01, "grad_norm": 5.659695148468018, "learning_rate": 1.5640297942809696e-06, "loss": 0.5581, "step": 6966 }, { "epoch": 1.01, "grad_norm": 6.2026214599609375, "learning_rate": 1.5638990846730262e-06, "loss": 0.5963, "step": 6967 }, { "epoch": 1.01, "grad_norm": 6.4854888916015625, "learning_rate": 1.5637683609375079e-06, "loss": 0.6104, "step": 6968 }, { "epoch": 1.01, "grad_norm": 6.0435028076171875, "learning_rate": 1.563637623077689e-06, "loss": 0.5727, "step": 6969 }, { "epoch": 1.01, "grad_norm": 5.592912673950195, "learning_rate": 1.5635068710968453e-06, "loss": 0.5545, "step": 6970 }, { "epoch": 1.01, "grad_norm": 5.854554176330566, "learning_rate": 1.5633761049982525e-06, "loss": 0.579, "step": 6971 }, { "epoch": 1.01, "grad_norm": 6.302458763122559, "learning_rate": 1.563245324785187e-06, "loss": 0.5208, "step": 6972 }, { "epoch": 1.01, "grad_norm": 5.782331466674805, "learning_rate": 1.563114530460925e-06, "loss": 0.5297, "step": 6973 }, { "epoch": 1.01, "grad_norm": 5.666762351989746, "learning_rate": 1.5629837220287434e-06, "loss": 0.5136, "step": 6974 }, { "epoch": 1.01, "grad_norm": 6.499039173126221, "learning_rate": 1.5628528994919195e-06, "loss": 0.5984, "step": 6975 }, { "epoch": 1.01, "grad_norm": 6.219986438751221, "learning_rate": 1.5627220628537305e-06, "loss": 0.5488, "step": 6976 }, { "epoch": 1.01, "grad_norm": 6.527142524719238, "learning_rate": 1.5625912121174546e-06, "loss": 0.6357, "step": 6977 }, { "epoch": 1.01, "grad_norm": 5.760953426361084, "learning_rate": 1.5624603472863703e-06, "loss": 0.5756, "step": 6978 }, { "epoch": 1.01, "grad_norm": 5.8094987869262695, "learning_rate": 1.5623294683637554e-06, "loss": 0.5465, "step": 6979 }, { "epoch": 1.01, "grad_norm": 5.800323009490967, "learning_rate": 1.5621985753528897e-06, "loss": 0.5429, "step": 6980 }, { "epoch": 1.01, "grad_norm": 5.7619147300720215, "learning_rate": 1.5620676682570521e-06, "loss": 0.4831, "step": 6981 }, { "epoch": 1.01, "grad_norm": 5.727487087249756, "learning_rate": 1.5619367470795222e-06, "loss": 0.5391, "step": 6982 }, { "epoch": 1.01, "grad_norm": 5.684389591217041, "learning_rate": 1.5618058118235803e-06, "loss": 0.5746, "step": 6983 }, { "epoch": 1.01, "grad_norm": 5.349471092224121, "learning_rate": 1.5616748624925068e-06, "loss": 0.5095, "step": 6984 }, { "epoch": 1.01, "grad_norm": 6.241159915924072, "learning_rate": 1.5615438990895817e-06, "loss": 0.5983, "step": 6985 }, { "epoch": 1.01, "grad_norm": 6.097652912139893, "learning_rate": 1.561412921618087e-06, "loss": 0.4773, "step": 6986 }, { "epoch": 1.01, "grad_norm": 5.793916702270508, "learning_rate": 1.561281930081304e-06, "loss": 0.5667, "step": 6987 }, { "epoch": 1.01, "grad_norm": 5.8746538162231445, "learning_rate": 1.5611509244825138e-06, "loss": 0.4744, "step": 6988 }, { "epoch": 1.01, "grad_norm": 5.693210124969482, "learning_rate": 1.5610199048249992e-06, "loss": 0.5346, "step": 6989 }, { "epoch": 1.01, "grad_norm": 5.9385809898376465, "learning_rate": 1.5608888711120423e-06, "loss": 0.5177, "step": 6990 }, { "epoch": 1.01, "grad_norm": 5.524481296539307, "learning_rate": 1.5607578233469262e-06, "loss": 0.5312, "step": 6991 }, { "epoch": 1.01, "grad_norm": 5.986470699310303, "learning_rate": 1.5606267615329338e-06, "loss": 0.5088, "step": 6992 }, { "epoch": 1.01, "grad_norm": 5.602522373199463, "learning_rate": 1.5604956856733489e-06, "loss": 0.5576, "step": 6993 }, { "epoch": 1.01, "grad_norm": 6.10554838180542, "learning_rate": 1.5603645957714553e-06, "loss": 0.4815, "step": 6994 }, { "epoch": 1.01, "grad_norm": 6.433117866516113, "learning_rate": 1.5602334918305374e-06, "loss": 0.5922, "step": 6995 }, { "epoch": 1.02, "grad_norm": 6.0701422691345215, "learning_rate": 1.5601023738538794e-06, "loss": 0.5488, "step": 6996 }, { "epoch": 1.02, "grad_norm": 5.6781535148620605, "learning_rate": 1.5599712418447668e-06, "loss": 0.5298, "step": 6997 }, { "epoch": 1.02, "grad_norm": 5.850528717041016, "learning_rate": 1.5598400958064844e-06, "loss": 0.512, "step": 6998 }, { "epoch": 1.02, "grad_norm": 6.372696399688721, "learning_rate": 1.5597089357423178e-06, "loss": 0.5326, "step": 6999 }, { "epoch": 1.02, "grad_norm": 6.268696308135986, "learning_rate": 1.5595777616555537e-06, "loss": 0.5152, "step": 7000 }, { "epoch": 1.02, "grad_norm": 5.736248016357422, "learning_rate": 1.5594465735494778e-06, "loss": 0.5181, "step": 7001 }, { "epoch": 1.02, "grad_norm": 5.981385707855225, "learning_rate": 1.559315371427377e-06, "loss": 0.4749, "step": 7002 }, { "epoch": 1.02, "grad_norm": 5.762113571166992, "learning_rate": 1.5591841552925383e-06, "loss": 0.5348, "step": 7003 }, { "epoch": 1.02, "grad_norm": 6.030233860015869, "learning_rate": 1.5590529251482495e-06, "loss": 0.5931, "step": 7004 }, { "epoch": 1.02, "grad_norm": 6.4276299476623535, "learning_rate": 1.5589216809977975e-06, "loss": 0.5077, "step": 7005 }, { "epoch": 1.02, "grad_norm": 6.102529048919678, "learning_rate": 1.558790422844471e-06, "loss": 0.5809, "step": 7006 }, { "epoch": 1.02, "grad_norm": 5.760941982269287, "learning_rate": 1.5586591506915588e-06, "loss": 0.5641, "step": 7007 }, { "epoch": 1.02, "grad_norm": 6.311315536499023, "learning_rate": 1.558527864542349e-06, "loss": 0.5544, "step": 7008 }, { "epoch": 1.02, "grad_norm": 5.629438877105713, "learning_rate": 1.5583965644001312e-06, "loss": 0.5826, "step": 7009 }, { "epoch": 1.02, "grad_norm": 5.178065299987793, "learning_rate": 1.5582652502681945e-06, "loss": 0.4978, "step": 7010 }, { "epoch": 1.02, "grad_norm": 6.589809417724609, "learning_rate": 1.5581339221498295e-06, "loss": 0.5738, "step": 7011 }, { "epoch": 1.02, "grad_norm": 5.768608093261719, "learning_rate": 1.5580025800483258e-06, "loss": 0.4923, "step": 7012 }, { "epoch": 1.02, "grad_norm": 6.0707855224609375, "learning_rate": 1.5578712239669737e-06, "loss": 0.4729, "step": 7013 }, { "epoch": 1.02, "grad_norm": 5.4111433029174805, "learning_rate": 1.5577398539090652e-06, "loss": 0.5441, "step": 7014 }, { "epoch": 1.02, "grad_norm": 6.279445171356201, "learning_rate": 1.5576084698778904e-06, "loss": 0.603, "step": 7015 }, { "epoch": 1.02, "grad_norm": 5.741250991821289, "learning_rate": 1.5574770718767418e-06, "loss": 0.5062, "step": 7016 }, { "epoch": 1.02, "grad_norm": 5.813643455505371, "learning_rate": 1.5573456599089109e-06, "loss": 0.5272, "step": 7017 }, { "epoch": 1.02, "grad_norm": 5.980890274047852, "learning_rate": 1.5572142339776896e-06, "loss": 0.5558, "step": 7018 }, { "epoch": 1.02, "grad_norm": 6.3065409660339355, "learning_rate": 1.5570827940863715e-06, "loss": 0.538, "step": 7019 }, { "epoch": 1.02, "grad_norm": 6.025665283203125, "learning_rate": 1.5569513402382487e-06, "loss": 0.536, "step": 7020 }, { "epoch": 1.02, "grad_norm": 6.183714866638184, "learning_rate": 1.5568198724366158e-06, "loss": 0.5614, "step": 7021 }, { "epoch": 1.02, "grad_norm": 5.193768501281738, "learning_rate": 1.5566883906847652e-06, "loss": 0.4701, "step": 7022 }, { "epoch": 1.02, "grad_norm": 6.214874267578125, "learning_rate": 1.5565568949859914e-06, "loss": 0.506, "step": 7023 }, { "epoch": 1.02, "grad_norm": 5.20422887802124, "learning_rate": 1.556425385343589e-06, "loss": 0.4863, "step": 7024 }, { "epoch": 1.02, "grad_norm": 5.707637310028076, "learning_rate": 1.556293861760853e-06, "loss": 0.5494, "step": 7025 }, { "epoch": 1.02, "grad_norm": 5.982580184936523, "learning_rate": 1.556162324241078e-06, "loss": 0.5624, "step": 7026 }, { "epoch": 1.02, "grad_norm": 6.494805335998535, "learning_rate": 1.5560307727875594e-06, "loss": 0.5262, "step": 7027 }, { "epoch": 1.02, "grad_norm": 6.610616683959961, "learning_rate": 1.5558992074035934e-06, "loss": 0.5045, "step": 7028 }, { "epoch": 1.02, "grad_norm": 6.465534210205078, "learning_rate": 1.5557676280924762e-06, "loss": 0.5326, "step": 7029 }, { "epoch": 1.02, "grad_norm": 6.138591289520264, "learning_rate": 1.5556360348575038e-06, "loss": 0.5436, "step": 7030 }, { "epoch": 1.02, "grad_norm": 6.512828826904297, "learning_rate": 1.5555044277019733e-06, "loss": 0.5997, "step": 7031 }, { "epoch": 1.02, "grad_norm": 6.4233903884887695, "learning_rate": 1.5553728066291822e-06, "loss": 0.5343, "step": 7032 }, { "epoch": 1.02, "grad_norm": 6.168122291564941, "learning_rate": 1.5552411716424276e-06, "loss": 0.4954, "step": 7033 }, { "epoch": 1.02, "grad_norm": 6.282258033752441, "learning_rate": 1.555109522745008e-06, "loss": 0.5588, "step": 7034 }, { "epoch": 1.02, "grad_norm": 5.043739318847656, "learning_rate": 1.5549778599402207e-06, "loss": 0.5937, "step": 7035 }, { "epoch": 1.02, "grad_norm": 6.452272415161133, "learning_rate": 1.5548461832313656e-06, "loss": 0.5269, "step": 7036 }, { "epoch": 1.02, "grad_norm": 6.124050140380859, "learning_rate": 1.5547144926217404e-06, "loss": 0.579, "step": 7037 }, { "epoch": 1.02, "grad_norm": 6.0704216957092285, "learning_rate": 1.554582788114645e-06, "loss": 0.53, "step": 7038 }, { "epoch": 1.02, "grad_norm": 5.972492218017578, "learning_rate": 1.5544510697133787e-06, "loss": 0.5874, "step": 7039 }, { "epoch": 1.02, "grad_norm": 6.417275905609131, "learning_rate": 1.554319337421242e-06, "loss": 0.5445, "step": 7040 }, { "epoch": 1.02, "grad_norm": 5.814656734466553, "learning_rate": 1.554187591241535e-06, "loss": 0.4949, "step": 7041 }, { "epoch": 1.02, "grad_norm": 6.059376239776611, "learning_rate": 1.5540558311775583e-06, "loss": 0.5501, "step": 7042 }, { "epoch": 1.02, "grad_norm": 5.898001194000244, "learning_rate": 1.553924057232613e-06, "loss": 0.4446, "step": 7043 }, { "epoch": 1.02, "grad_norm": 5.407886505126953, "learning_rate": 1.5537922694100004e-06, "loss": 0.4609, "step": 7044 }, { "epoch": 1.02, "grad_norm": 7.362343788146973, "learning_rate": 1.5536604677130227e-06, "loss": 0.6089, "step": 7045 }, { "epoch": 1.02, "grad_norm": 6.342845916748047, "learning_rate": 1.5535286521449811e-06, "loss": 0.4909, "step": 7046 }, { "epoch": 1.02, "grad_norm": 6.218339920043945, "learning_rate": 1.553396822709179e-06, "loss": 0.4939, "step": 7047 }, { "epoch": 1.02, "grad_norm": 6.488755226135254, "learning_rate": 1.5532649794089182e-06, "loss": 0.5644, "step": 7048 }, { "epoch": 1.02, "grad_norm": 5.7914557456970215, "learning_rate": 1.5531331222475027e-06, "loss": 0.5838, "step": 7049 }, { "epoch": 1.02, "grad_norm": 6.3262715339660645, "learning_rate": 1.5530012512282351e-06, "loss": 0.5191, "step": 7050 }, { "epoch": 1.02, "grad_norm": 6.408331871032715, "learning_rate": 1.5528693663544202e-06, "loss": 0.5754, "step": 7051 }, { "epoch": 1.02, "grad_norm": 6.3345112800598145, "learning_rate": 1.5527374676293612e-06, "loss": 0.5126, "step": 7052 }, { "epoch": 1.02, "grad_norm": 6.309241771697998, "learning_rate": 1.5526055550563632e-06, "loss": 0.5741, "step": 7053 }, { "epoch": 1.02, "grad_norm": 5.866755962371826, "learning_rate": 1.5524736286387312e-06, "loss": 0.524, "step": 7054 }, { "epoch": 1.02, "grad_norm": 5.747471332550049, "learning_rate": 1.5523416883797697e-06, "loss": 0.4737, "step": 7055 }, { "epoch": 1.02, "grad_norm": 5.742164134979248, "learning_rate": 1.5522097342827846e-06, "loss": 0.5062, "step": 7056 }, { "epoch": 1.02, "grad_norm": 6.015220642089844, "learning_rate": 1.5520777663510822e-06, "loss": 0.5764, "step": 7057 }, { "epoch": 1.02, "grad_norm": 5.427418231964111, "learning_rate": 1.5519457845879681e-06, "loss": 0.5336, "step": 7058 }, { "epoch": 1.02, "grad_norm": 6.368966579437256, "learning_rate": 1.5518137889967494e-06, "loss": 0.5747, "step": 7059 }, { "epoch": 1.02, "grad_norm": 6.206367015838623, "learning_rate": 1.551681779580733e-06, "loss": 0.5564, "step": 7060 }, { "epoch": 1.02, "grad_norm": 6.381847381591797, "learning_rate": 1.5515497563432255e-06, "loss": 0.5344, "step": 7061 }, { "epoch": 1.02, "grad_norm": 6.765416145324707, "learning_rate": 1.5514177192875356e-06, "loss": 0.5716, "step": 7062 }, { "epoch": 1.02, "grad_norm": 6.471317291259766, "learning_rate": 1.5512856684169702e-06, "loss": 0.5588, "step": 7063 }, { "epoch": 1.02, "grad_norm": 6.567835330963135, "learning_rate": 1.5511536037348384e-06, "loss": 0.5748, "step": 7064 }, { "epoch": 1.03, "grad_norm": 6.536781311035156, "learning_rate": 1.5510215252444483e-06, "loss": 0.5727, "step": 7065 }, { "epoch": 1.03, "grad_norm": 6.726598739624023, "learning_rate": 1.5508894329491096e-06, "loss": 0.5448, "step": 7066 }, { "epoch": 1.03, "grad_norm": 6.36397647857666, "learning_rate": 1.550757326852131e-06, "loss": 0.4801, "step": 7067 }, { "epoch": 1.03, "grad_norm": 5.9161295890808105, "learning_rate": 1.5506252069568224e-06, "loss": 0.5178, "step": 7068 }, { "epoch": 1.03, "grad_norm": 9.485466003417969, "learning_rate": 1.5504930732664943e-06, "loss": 0.6209, "step": 7069 }, { "epoch": 1.03, "grad_norm": 6.71713399887085, "learning_rate": 1.5503609257844563e-06, "loss": 0.5632, "step": 7070 }, { "epoch": 1.03, "grad_norm": 6.440380573272705, "learning_rate": 1.5502287645140198e-06, "loss": 0.5674, "step": 7071 }, { "epoch": 1.03, "grad_norm": 5.673412799835205, "learning_rate": 1.5500965894584956e-06, "loss": 0.4788, "step": 7072 }, { "epoch": 1.03, "grad_norm": 6.876434803009033, "learning_rate": 1.549964400621195e-06, "loss": 0.5735, "step": 7073 }, { "epoch": 1.03, "grad_norm": 6.776725769042969, "learning_rate": 1.54983219800543e-06, "loss": 0.5657, "step": 7074 }, { "epoch": 1.03, "grad_norm": 7.181952476501465, "learning_rate": 1.5496999816145127e-06, "loss": 0.526, "step": 7075 }, { "epoch": 1.03, "grad_norm": 6.677334308624268, "learning_rate": 1.5495677514517555e-06, "loss": 0.5251, "step": 7076 }, { "epoch": 1.03, "grad_norm": 5.359808444976807, "learning_rate": 1.5494355075204713e-06, "loss": 0.5041, "step": 7077 }, { "epoch": 1.03, "grad_norm": 5.827429294586182, "learning_rate": 1.5493032498239733e-06, "loss": 0.5746, "step": 7078 }, { "epoch": 1.03, "grad_norm": 6.306874752044678, "learning_rate": 1.5491709783655749e-06, "loss": 0.586, "step": 7079 }, { "epoch": 1.03, "grad_norm": 6.468084335327148, "learning_rate": 1.5490386931485895e-06, "loss": 0.5739, "step": 7080 }, { "epoch": 1.03, "grad_norm": 5.791637420654297, "learning_rate": 1.5489063941763322e-06, "loss": 0.5733, "step": 7081 }, { "epoch": 1.03, "grad_norm": 6.135094165802002, "learning_rate": 1.5487740814521168e-06, "loss": 0.5469, "step": 7082 }, { "epoch": 1.03, "grad_norm": 6.040616989135742, "learning_rate": 1.5486417549792585e-06, "loss": 0.5028, "step": 7083 }, { "epoch": 1.03, "grad_norm": 5.42029333114624, "learning_rate": 1.5485094147610726e-06, "loss": 0.4957, "step": 7084 }, { "epoch": 1.03, "grad_norm": 6.192620754241943, "learning_rate": 1.5483770608008744e-06, "loss": 0.5393, "step": 7085 }, { "epoch": 1.03, "grad_norm": 6.10693359375, "learning_rate": 1.54824469310198e-06, "loss": 0.5334, "step": 7086 }, { "epoch": 1.03, "grad_norm": 5.964653015136719, "learning_rate": 1.548112311667706e-06, "loss": 0.5649, "step": 7087 }, { "epoch": 1.03, "grad_norm": 5.368741035461426, "learning_rate": 1.5479799165013684e-06, "loss": 0.526, "step": 7088 }, { "epoch": 1.03, "grad_norm": 6.679201126098633, "learning_rate": 1.547847507606284e-06, "loss": 0.5951, "step": 7089 }, { "epoch": 1.03, "grad_norm": 6.573089122772217, "learning_rate": 1.5477150849857707e-06, "loss": 0.5959, "step": 7090 }, { "epoch": 1.03, "grad_norm": 6.118683815002441, "learning_rate": 1.547582648643146e-06, "loss": 0.5082, "step": 7091 }, { "epoch": 1.03, "grad_norm": 6.273021221160889, "learning_rate": 1.5474501985817276e-06, "loss": 0.5518, "step": 7092 }, { "epoch": 1.03, "grad_norm": 5.327395915985107, "learning_rate": 1.5473177348048341e-06, "loss": 0.4805, "step": 7093 }, { "epoch": 1.03, "grad_norm": 6.297444820404053, "learning_rate": 1.547185257315784e-06, "loss": 0.483, "step": 7094 }, { "epoch": 1.03, "grad_norm": 6.150375843048096, "learning_rate": 1.5470527661178966e-06, "loss": 0.49, "step": 7095 }, { "epoch": 1.03, "grad_norm": 7.17393684387207, "learning_rate": 1.5469202612144904e-06, "loss": 0.6313, "step": 7096 }, { "epoch": 1.03, "grad_norm": 6.757619380950928, "learning_rate": 1.5467877426088864e-06, "loss": 0.5929, "step": 7097 }, { "epoch": 1.03, "grad_norm": 6.804197311401367, "learning_rate": 1.5466552103044034e-06, "loss": 0.6016, "step": 7098 }, { "epoch": 1.03, "grad_norm": 5.7301836013793945, "learning_rate": 1.5465226643043626e-06, "loss": 0.528, "step": 7099 }, { "epoch": 1.03, "grad_norm": 7.20330286026001, "learning_rate": 1.5463901046120845e-06, "loss": 0.6093, "step": 7100 }, { "epoch": 1.03, "grad_norm": 5.554492950439453, "learning_rate": 1.54625753123089e-06, "loss": 0.5688, "step": 7101 }, { "epoch": 1.03, "grad_norm": 6.813892364501953, "learning_rate": 1.5461249441641007e-06, "loss": 0.6202, "step": 7102 }, { "epoch": 1.03, "grad_norm": 5.651064872741699, "learning_rate": 1.5459923434150383e-06, "loss": 0.532, "step": 7103 }, { "epoch": 1.03, "grad_norm": 5.660669803619385, "learning_rate": 1.545859728987025e-06, "loss": 0.5478, "step": 7104 }, { "epoch": 1.03, "grad_norm": 5.962374687194824, "learning_rate": 1.5457271008833829e-06, "loss": 0.5161, "step": 7105 }, { "epoch": 1.03, "grad_norm": 6.6537275314331055, "learning_rate": 1.545594459107435e-06, "loss": 0.4807, "step": 7106 }, { "epoch": 1.03, "grad_norm": 6.226770401000977, "learning_rate": 1.5454618036625044e-06, "loss": 0.5976, "step": 7107 }, { "epoch": 1.03, "grad_norm": 5.8986310958862305, "learning_rate": 1.5453291345519147e-06, "loss": 0.5289, "step": 7108 }, { "epoch": 1.03, "grad_norm": 6.174992561340332, "learning_rate": 1.5451964517789895e-06, "loss": 0.5466, "step": 7109 }, { "epoch": 1.03, "grad_norm": 6.9043097496032715, "learning_rate": 1.545063755347053e-06, "loss": 0.5869, "step": 7110 }, { "epoch": 1.03, "grad_norm": 6.089147567749023, "learning_rate": 1.54493104525943e-06, "loss": 0.5076, "step": 7111 }, { "epoch": 1.03, "grad_norm": 6.078608989715576, "learning_rate": 1.5447983215194448e-06, "loss": 0.5463, "step": 7112 }, { "epoch": 1.03, "grad_norm": 5.877993106842041, "learning_rate": 1.5446655841304233e-06, "loss": 0.5173, "step": 7113 }, { "epoch": 1.03, "grad_norm": 6.38622522354126, "learning_rate": 1.54453283309569e-06, "loss": 0.5208, "step": 7114 }, { "epoch": 1.03, "grad_norm": 5.755248546600342, "learning_rate": 1.5444000684185717e-06, "loss": 0.5627, "step": 7115 }, { "epoch": 1.03, "grad_norm": 6.2212042808532715, "learning_rate": 1.5442672901023941e-06, "loss": 0.5773, "step": 7116 }, { "epoch": 1.03, "grad_norm": 6.5146660804748535, "learning_rate": 1.544134498150484e-06, "loss": 0.6239, "step": 7117 }, { "epoch": 1.03, "grad_norm": 6.100567817687988, "learning_rate": 1.544001692566168e-06, "loss": 0.6332, "step": 7118 }, { "epoch": 1.03, "grad_norm": 6.353460311889648, "learning_rate": 1.5438688733527738e-06, "loss": 0.5756, "step": 7119 }, { "epoch": 1.03, "grad_norm": 6.332047939300537, "learning_rate": 1.5437360405136285e-06, "loss": 0.5501, "step": 7120 }, { "epoch": 1.03, "grad_norm": 7.111423015594482, "learning_rate": 1.5436031940520602e-06, "loss": 0.4934, "step": 7121 }, { "epoch": 1.03, "grad_norm": 6.018584251403809, "learning_rate": 1.5434703339713973e-06, "loss": 0.5619, "step": 7122 }, { "epoch": 1.03, "grad_norm": 6.20269250869751, "learning_rate": 1.5433374602749681e-06, "loss": 0.513, "step": 7123 }, { "epoch": 1.03, "grad_norm": 5.90950870513916, "learning_rate": 1.5432045729661014e-06, "loss": 0.4454, "step": 7124 }, { "epoch": 1.03, "grad_norm": 5.742892265319824, "learning_rate": 1.5430716720481272e-06, "loss": 0.4841, "step": 7125 }, { "epoch": 1.03, "grad_norm": 7.004723072052002, "learning_rate": 1.5429387575243741e-06, "loss": 0.554, "step": 7126 }, { "epoch": 1.03, "grad_norm": 5.505630970001221, "learning_rate": 1.5428058293981732e-06, "loss": 0.4843, "step": 7127 }, { "epoch": 1.03, "grad_norm": 6.048225402832031, "learning_rate": 1.5426728876728538e-06, "loss": 0.553, "step": 7128 }, { "epoch": 1.03, "grad_norm": 6.960262775421143, "learning_rate": 1.542539932351747e-06, "loss": 0.5521, "step": 7129 }, { "epoch": 1.03, "grad_norm": 6.728884220123291, "learning_rate": 1.542406963438184e-06, "loss": 0.5355, "step": 7130 }, { "epoch": 1.03, "grad_norm": 5.983931541442871, "learning_rate": 1.5422739809354957e-06, "loss": 0.5964, "step": 7131 }, { "epoch": 1.03, "grad_norm": 5.752406597137451, "learning_rate": 1.5421409848470137e-06, "loss": 0.6222, "step": 7132 }, { "epoch": 1.03, "grad_norm": 5.858312606811523, "learning_rate": 1.5420079751760705e-06, "loss": 0.5105, "step": 7133 }, { "epoch": 1.04, "grad_norm": 6.889925956726074, "learning_rate": 1.541874951925998e-06, "loss": 0.5446, "step": 7134 }, { "epoch": 1.04, "grad_norm": 6.077577590942383, "learning_rate": 1.541741915100129e-06, "loss": 0.5529, "step": 7135 }, { "epoch": 1.04, "grad_norm": 6.324832916259766, "learning_rate": 1.5416088647017964e-06, "loss": 0.5635, "step": 7136 }, { "epoch": 1.04, "grad_norm": 5.5199360847473145, "learning_rate": 1.541475800734334e-06, "loss": 0.5408, "step": 7137 }, { "epoch": 1.04, "grad_norm": 6.733445167541504, "learning_rate": 1.541342723201075e-06, "loss": 0.5088, "step": 7138 }, { "epoch": 1.04, "grad_norm": 5.952751636505127, "learning_rate": 1.5412096321053541e-06, "loss": 0.5243, "step": 7139 }, { "epoch": 1.04, "grad_norm": 6.860622882843018, "learning_rate": 1.5410765274505049e-06, "loss": 0.6595, "step": 7140 }, { "epoch": 1.04, "grad_norm": 6.606691837310791, "learning_rate": 1.5409434092398625e-06, "loss": 0.5518, "step": 7141 }, { "epoch": 1.04, "grad_norm": 5.3261613845825195, "learning_rate": 1.5408102774767619e-06, "loss": 0.4913, "step": 7142 }, { "epoch": 1.04, "grad_norm": 6.560927867889404, "learning_rate": 1.5406771321645387e-06, "loss": 0.4962, "step": 7143 }, { "epoch": 1.04, "grad_norm": 6.198513507843018, "learning_rate": 1.5405439733065282e-06, "loss": 0.4853, "step": 7144 }, { "epoch": 1.04, "grad_norm": 5.536114692687988, "learning_rate": 1.540410800906067e-06, "loss": 0.4889, "step": 7145 }, { "epoch": 1.04, "grad_norm": 6.499027252197266, "learning_rate": 1.540277614966491e-06, "loss": 0.5763, "step": 7146 }, { "epoch": 1.04, "grad_norm": 6.735589981079102, "learning_rate": 1.5401444154911374e-06, "loss": 0.5972, "step": 7147 }, { "epoch": 1.04, "grad_norm": 5.77053689956665, "learning_rate": 1.540011202483343e-06, "loss": 0.4922, "step": 7148 }, { "epoch": 1.04, "grad_norm": 7.366692543029785, "learning_rate": 1.5398779759464455e-06, "loss": 0.6363, "step": 7149 }, { "epoch": 1.04, "grad_norm": 6.102091312408447, "learning_rate": 1.5397447358837825e-06, "loss": 0.561, "step": 7150 }, { "epoch": 1.04, "grad_norm": 6.446319580078125, "learning_rate": 1.5396114822986922e-06, "loss": 0.6201, "step": 7151 }, { "epoch": 1.04, "grad_norm": 7.106665134429932, "learning_rate": 1.5394782151945128e-06, "loss": 0.5053, "step": 7152 }, { "epoch": 1.04, "grad_norm": 5.518808841705322, "learning_rate": 1.5393449345745833e-06, "loss": 0.46, "step": 7153 }, { "epoch": 1.04, "grad_norm": 5.978188514709473, "learning_rate": 1.5392116404422426e-06, "loss": 0.551, "step": 7154 }, { "epoch": 1.04, "grad_norm": 6.005429267883301, "learning_rate": 1.5390783328008309e-06, "loss": 0.5078, "step": 7155 }, { "epoch": 1.04, "grad_norm": 5.282037734985352, "learning_rate": 1.5389450116536868e-06, "loss": 0.5336, "step": 7156 }, { "epoch": 1.04, "grad_norm": 5.878067970275879, "learning_rate": 1.5388116770041516e-06, "loss": 0.5069, "step": 7157 }, { "epoch": 1.04, "grad_norm": 6.3007330894470215, "learning_rate": 1.5386783288555655e-06, "loss": 0.5048, "step": 7158 }, { "epoch": 1.04, "grad_norm": 8.58658504486084, "learning_rate": 1.5385449672112688e-06, "loss": 0.6811, "step": 7159 }, { "epoch": 1.04, "grad_norm": 6.163122177124023, "learning_rate": 1.538411592074603e-06, "loss": 0.606, "step": 7160 }, { "epoch": 1.04, "grad_norm": 6.524459362030029, "learning_rate": 1.5382782034489096e-06, "loss": 0.571, "step": 7161 }, { "epoch": 1.04, "grad_norm": 5.545342922210693, "learning_rate": 1.5381448013375305e-06, "loss": 0.5764, "step": 7162 }, { "epoch": 1.04, "grad_norm": 6.497788906097412, "learning_rate": 1.5380113857438078e-06, "loss": 0.5989, "step": 7163 }, { "epoch": 1.04, "grad_norm": 6.779799461364746, "learning_rate": 1.537877956671084e-06, "loss": 0.5247, "step": 7164 }, { "epoch": 1.04, "grad_norm": 6.011943340301514, "learning_rate": 1.5377445141227016e-06, "loss": 0.5626, "step": 7165 }, { "epoch": 1.04, "grad_norm": 6.111469268798828, "learning_rate": 1.5376110581020046e-06, "loss": 0.5424, "step": 7166 }, { "epoch": 1.04, "grad_norm": 6.363041877746582, "learning_rate": 1.537477588612336e-06, "loss": 0.6327, "step": 7167 }, { "epoch": 1.04, "grad_norm": 6.161510467529297, "learning_rate": 1.5373441056570395e-06, "loss": 0.5632, "step": 7168 }, { "epoch": 1.04, "grad_norm": 5.683291435241699, "learning_rate": 1.5372106092394597e-06, "loss": 0.5274, "step": 7169 }, { "epoch": 1.04, "grad_norm": 5.76625919342041, "learning_rate": 1.537077099362941e-06, "loss": 0.5501, "step": 7170 }, { "epoch": 1.04, "grad_norm": 5.769199371337891, "learning_rate": 1.5369435760308283e-06, "loss": 0.5082, "step": 7171 }, { "epoch": 1.04, "grad_norm": 6.563579559326172, "learning_rate": 1.5368100392464664e-06, "loss": 0.6347, "step": 7172 }, { "epoch": 1.04, "grad_norm": 6.788320541381836, "learning_rate": 1.5366764890132014e-06, "loss": 0.6432, "step": 7173 }, { "epoch": 1.04, "grad_norm": 6.233804225921631, "learning_rate": 1.5365429253343792e-06, "loss": 0.5837, "step": 7174 }, { "epoch": 1.04, "grad_norm": 7.161197662353516, "learning_rate": 1.5364093482133456e-06, "loss": 0.6129, "step": 7175 }, { "epoch": 1.04, "grad_norm": 6.050364971160889, "learning_rate": 1.536275757653447e-06, "loss": 0.5529, "step": 7176 }, { "epoch": 1.04, "grad_norm": 7.016719818115234, "learning_rate": 1.5361421536580312e-06, "loss": 0.5913, "step": 7177 }, { "epoch": 1.04, "grad_norm": 6.459245681762695, "learning_rate": 1.5360085362304447e-06, "loss": 0.5364, "step": 7178 }, { "epoch": 1.04, "grad_norm": 5.6949782371521, "learning_rate": 1.5358749053740353e-06, "loss": 0.5787, "step": 7179 }, { "epoch": 1.04, "grad_norm": 6.212345600128174, "learning_rate": 1.5357412610921507e-06, "loss": 0.5785, "step": 7180 }, { "epoch": 1.04, "grad_norm": 5.7595438957214355, "learning_rate": 1.5356076033881392e-06, "loss": 0.4681, "step": 7181 }, { "epoch": 1.04, "grad_norm": 6.032371520996094, "learning_rate": 1.5354739322653498e-06, "loss": 0.598, "step": 7182 }, { "epoch": 1.04, "grad_norm": 6.1135358810424805, "learning_rate": 1.535340247727131e-06, "loss": 0.5099, "step": 7183 }, { "epoch": 1.04, "grad_norm": 6.828721046447754, "learning_rate": 1.535206549776832e-06, "loss": 0.5907, "step": 7184 }, { "epoch": 1.04, "grad_norm": 7.950847148895264, "learning_rate": 1.5350728384178025e-06, "loss": 0.5781, "step": 7185 }, { "epoch": 1.04, "grad_norm": 5.634886741638184, "learning_rate": 1.5349391136533926e-06, "loss": 0.5803, "step": 7186 }, { "epoch": 1.04, "grad_norm": 7.71990442276001, "learning_rate": 1.534805375486952e-06, "loss": 0.6326, "step": 7187 }, { "epoch": 1.04, "grad_norm": 6.6676344871521, "learning_rate": 1.5346716239218319e-06, "loss": 0.527, "step": 7188 }, { "epoch": 1.04, "grad_norm": 6.228033542633057, "learning_rate": 1.534537858961383e-06, "loss": 0.5265, "step": 7189 }, { "epoch": 1.04, "grad_norm": 5.869978427886963, "learning_rate": 1.5344040806089566e-06, "loss": 0.5222, "step": 7190 }, { "epoch": 1.04, "grad_norm": 6.323643684387207, "learning_rate": 1.5342702888679043e-06, "loss": 0.6386, "step": 7191 }, { "epoch": 1.04, "grad_norm": 5.619889259338379, "learning_rate": 1.534136483741578e-06, "loss": 0.5145, "step": 7192 }, { "epoch": 1.04, "grad_norm": 7.090618133544922, "learning_rate": 1.5340026652333301e-06, "loss": 0.5796, "step": 7193 }, { "epoch": 1.04, "grad_norm": 6.064306259155273, "learning_rate": 1.533868833346513e-06, "loss": 0.4858, "step": 7194 }, { "epoch": 1.04, "grad_norm": 6.02446985244751, "learning_rate": 1.5337349880844798e-06, "loss": 0.508, "step": 7195 }, { "epoch": 1.04, "grad_norm": 5.8728346824646, "learning_rate": 1.5336011294505836e-06, "loss": 0.5367, "step": 7196 }, { "epoch": 1.04, "grad_norm": 5.666460990905762, "learning_rate": 1.5334672574481782e-06, "loss": 0.5077, "step": 7197 }, { "epoch": 1.04, "grad_norm": 5.4913716316223145, "learning_rate": 1.5333333720806173e-06, "loss": 0.4864, "step": 7198 }, { "epoch": 1.04, "grad_norm": 6.118063449859619, "learning_rate": 1.5331994733512558e-06, "loss": 0.4833, "step": 7199 }, { "epoch": 1.04, "grad_norm": 6.74487829208374, "learning_rate": 1.5330655612634473e-06, "loss": 0.5485, "step": 7200 }, { "epoch": 1.04, "grad_norm": 5.631131172180176, "learning_rate": 1.5329316358205473e-06, "loss": 0.5424, "step": 7201 }, { "epoch": 1.04, "grad_norm": 5.517048358917236, "learning_rate": 1.5327976970259113e-06, "loss": 0.4857, "step": 7202 }, { "epoch": 1.05, "grad_norm": 6.679351806640625, "learning_rate": 1.5326637448828948e-06, "loss": 0.6267, "step": 7203 }, { "epoch": 1.05, "grad_norm": 6.097255229949951, "learning_rate": 1.5325297793948534e-06, "loss": 0.5761, "step": 7204 }, { "epoch": 1.05, "grad_norm": 6.508622646331787, "learning_rate": 1.5323958005651441e-06, "loss": 0.5397, "step": 7205 }, { "epoch": 1.05, "grad_norm": 7.002904415130615, "learning_rate": 1.5322618083971226e-06, "loss": 0.5117, "step": 7206 }, { "epoch": 1.05, "grad_norm": 5.865675449371338, "learning_rate": 1.5321278028941464e-06, "loss": 0.6147, "step": 7207 }, { "epoch": 1.05, "grad_norm": 6.205163955688477, "learning_rate": 1.5319937840595727e-06, "loss": 0.5268, "step": 7208 }, { "epoch": 1.05, "grad_norm": 6.42431640625, "learning_rate": 1.5318597518967594e-06, "loss": 0.5542, "step": 7209 }, { "epoch": 1.05, "grad_norm": 6.570440292358398, "learning_rate": 1.531725706409064e-06, "loss": 0.5329, "step": 7210 }, { "epoch": 1.05, "grad_norm": 5.85891056060791, "learning_rate": 1.531591647599845e-06, "loss": 0.5089, "step": 7211 }, { "epoch": 1.05, "grad_norm": 6.114213943481445, "learning_rate": 1.5314575754724612e-06, "loss": 0.5094, "step": 7212 }, { "epoch": 1.05, "grad_norm": 5.625772476196289, "learning_rate": 1.5313234900302712e-06, "loss": 0.5173, "step": 7213 }, { "epoch": 1.05, "grad_norm": 6.4566521644592285, "learning_rate": 1.531189391276634e-06, "loss": 0.5675, "step": 7214 }, { "epoch": 1.05, "grad_norm": 5.809758186340332, "learning_rate": 1.5310552792149106e-06, "loss": 0.5289, "step": 7215 }, { "epoch": 1.05, "grad_norm": 6.112748146057129, "learning_rate": 1.5309211538484594e-06, "loss": 0.5734, "step": 7216 }, { "epoch": 1.05, "grad_norm": 6.64121675491333, "learning_rate": 1.5307870151806415e-06, "loss": 0.595, "step": 7217 }, { "epoch": 1.05, "grad_norm": 6.001723766326904, "learning_rate": 1.5306528632148177e-06, "loss": 0.5557, "step": 7218 }, { "epoch": 1.05, "grad_norm": 6.766955375671387, "learning_rate": 1.5305186979543484e-06, "loss": 0.5293, "step": 7219 }, { "epoch": 1.05, "grad_norm": 6.134349822998047, "learning_rate": 1.5303845194025948e-06, "loss": 0.5411, "step": 7220 }, { "epoch": 1.05, "grad_norm": 7.373122215270996, "learning_rate": 1.5302503275629194e-06, "loss": 0.5091, "step": 7221 }, { "epoch": 1.05, "grad_norm": 5.911250591278076, "learning_rate": 1.5301161224386832e-06, "loss": 0.4736, "step": 7222 }, { "epoch": 1.05, "grad_norm": 6.095491886138916, "learning_rate": 1.5299819040332492e-06, "loss": 0.5563, "step": 7223 }, { "epoch": 1.05, "grad_norm": 7.086265563964844, "learning_rate": 1.5298476723499793e-06, "loss": 0.5962, "step": 7224 }, { "epoch": 1.05, "grad_norm": 6.449814319610596, "learning_rate": 1.5297134273922373e-06, "loss": 0.5317, "step": 7225 }, { "epoch": 1.05, "grad_norm": 5.985518932342529, "learning_rate": 1.5295791691633862e-06, "loss": 0.5622, "step": 7226 }, { "epoch": 1.05, "grad_norm": 6.038072109222412, "learning_rate": 1.529444897666789e-06, "loss": 0.5538, "step": 7227 }, { "epoch": 1.05, "grad_norm": 6.04874849319458, "learning_rate": 1.5293106129058103e-06, "loss": 0.5945, "step": 7228 }, { "epoch": 1.05, "grad_norm": 5.6148271560668945, "learning_rate": 1.5291763148838147e-06, "loss": 0.5815, "step": 7229 }, { "epoch": 1.05, "grad_norm": 6.639448642730713, "learning_rate": 1.5290420036041658e-06, "loss": 0.592, "step": 7230 }, { "epoch": 1.05, "grad_norm": 5.518164157867432, "learning_rate": 1.528907679070229e-06, "loss": 0.5137, "step": 7231 }, { "epoch": 1.05, "grad_norm": 5.8907246589660645, "learning_rate": 1.52877334128537e-06, "loss": 0.5226, "step": 7232 }, { "epoch": 1.05, "grad_norm": 5.952589511871338, "learning_rate": 1.5286389902529542e-06, "loss": 0.5731, "step": 7233 }, { "epoch": 1.05, "grad_norm": 6.2796173095703125, "learning_rate": 1.5285046259763474e-06, "loss": 0.5476, "step": 7234 }, { "epoch": 1.05, "grad_norm": 5.802241802215576, "learning_rate": 1.5283702484589158e-06, "loss": 0.5152, "step": 7235 }, { "epoch": 1.05, "grad_norm": 6.22622537612915, "learning_rate": 1.528235857704026e-06, "loss": 0.571, "step": 7236 }, { "epoch": 1.05, "grad_norm": 6.271844863891602, "learning_rate": 1.5281014537150454e-06, "loss": 0.5433, "step": 7237 }, { "epoch": 1.05, "grad_norm": 6.542660236358643, "learning_rate": 1.5279670364953407e-06, "loss": 0.6208, "step": 7238 }, { "epoch": 1.05, "grad_norm": 5.352424144744873, "learning_rate": 1.5278326060482797e-06, "loss": 0.5009, "step": 7239 }, { "epoch": 1.05, "grad_norm": 5.710073471069336, "learning_rate": 1.5276981623772307e-06, "loss": 0.5318, "step": 7240 }, { "epoch": 1.05, "grad_norm": 6.3759589195251465, "learning_rate": 1.5275637054855614e-06, "loss": 0.4743, "step": 7241 }, { "epoch": 1.05, "grad_norm": 5.546197414398193, "learning_rate": 1.5274292353766409e-06, "loss": 0.5031, "step": 7242 }, { "epoch": 1.05, "grad_norm": 5.467672824859619, "learning_rate": 1.5272947520538378e-06, "loss": 0.5306, "step": 7243 }, { "epoch": 1.05, "grad_norm": 5.906896591186523, "learning_rate": 1.5271602555205211e-06, "loss": 0.5142, "step": 7244 }, { "epoch": 1.05, "grad_norm": 6.932499408721924, "learning_rate": 1.5270257457800613e-06, "loss": 0.5455, "step": 7245 }, { "epoch": 1.05, "grad_norm": 5.8827080726623535, "learning_rate": 1.5268912228358274e-06, "loss": 0.4848, "step": 7246 }, { "epoch": 1.05, "grad_norm": 6.504764080047607, "learning_rate": 1.5267566866911903e-06, "loss": 0.674, "step": 7247 }, { "epoch": 1.05, "grad_norm": 5.927197456359863, "learning_rate": 1.52662213734952e-06, "loss": 0.5527, "step": 7248 }, { "epoch": 1.05, "grad_norm": 5.599185466766357, "learning_rate": 1.526487574814188e-06, "loss": 0.4846, "step": 7249 }, { "epoch": 1.05, "grad_norm": 7.261517524719238, "learning_rate": 1.526352999088565e-06, "loss": 0.6695, "step": 7250 }, { "epoch": 1.05, "grad_norm": 6.176372051239014, "learning_rate": 1.526218410176023e-06, "loss": 0.5793, "step": 7251 }, { "epoch": 1.05, "grad_norm": 6.606868267059326, "learning_rate": 1.5260838080799335e-06, "loss": 0.5605, "step": 7252 }, { "epoch": 1.05, "grad_norm": 6.072675704956055, "learning_rate": 1.5259491928036693e-06, "loss": 0.558, "step": 7253 }, { "epoch": 1.05, "grad_norm": 6.890838623046875, "learning_rate": 1.5258145643506026e-06, "loss": 0.6412, "step": 7254 }, { "epoch": 1.05, "grad_norm": 5.828546047210693, "learning_rate": 1.5256799227241063e-06, "loss": 0.497, "step": 7255 }, { "epoch": 1.05, "grad_norm": 6.099584102630615, "learning_rate": 1.5255452679275536e-06, "loss": 0.5118, "step": 7256 }, { "epoch": 1.05, "grad_norm": 5.843352317810059, "learning_rate": 1.5254105999643183e-06, "loss": 0.5303, "step": 7257 }, { "epoch": 1.05, "grad_norm": 5.596447467803955, "learning_rate": 1.525275918837774e-06, "loss": 0.5422, "step": 7258 }, { "epoch": 1.05, "grad_norm": 6.000482082366943, "learning_rate": 1.5251412245512948e-06, "loss": 0.5148, "step": 7259 }, { "epoch": 1.05, "grad_norm": 5.181859493255615, "learning_rate": 1.525006517108256e-06, "loss": 0.4969, "step": 7260 }, { "epoch": 1.05, "grad_norm": 6.382900714874268, "learning_rate": 1.5248717965120318e-06, "loss": 0.4958, "step": 7261 }, { "epoch": 1.05, "grad_norm": 6.0192437171936035, "learning_rate": 1.5247370627659975e-06, "loss": 0.5189, "step": 7262 }, { "epoch": 1.05, "grad_norm": 5.926279067993164, "learning_rate": 1.5246023158735286e-06, "loss": 0.5412, "step": 7263 }, { "epoch": 1.05, "grad_norm": 5.742205619812012, "learning_rate": 1.5244675558380012e-06, "loss": 0.5146, "step": 7264 }, { "epoch": 1.05, "grad_norm": 6.441017150878906, "learning_rate": 1.5243327826627913e-06, "loss": 0.5717, "step": 7265 }, { "epoch": 1.05, "grad_norm": 6.70530891418457, "learning_rate": 1.5241979963512752e-06, "loss": 0.5748, "step": 7266 }, { "epoch": 1.05, "grad_norm": 6.825161933898926, "learning_rate": 1.5240631969068306e-06, "loss": 0.5404, "step": 7267 }, { "epoch": 1.05, "grad_norm": 6.294622898101807, "learning_rate": 1.5239283843328337e-06, "loss": 0.564, "step": 7268 }, { "epoch": 1.05, "grad_norm": 6.785064697265625, "learning_rate": 1.5237935586326627e-06, "loss": 0.6054, "step": 7269 }, { "epoch": 1.05, "grad_norm": 6.082202434539795, "learning_rate": 1.5236587198096949e-06, "loss": 0.5447, "step": 7270 }, { "epoch": 1.05, "grad_norm": 5.0443501472473145, "learning_rate": 1.5235238678673088e-06, "loss": 0.4398, "step": 7271 }, { "epoch": 1.06, "grad_norm": 6.3180060386657715, "learning_rate": 1.5233890028088828e-06, "loss": 0.4601, "step": 7272 }, { "epoch": 1.06, "grad_norm": 6.150937557220459, "learning_rate": 1.5232541246377956e-06, "loss": 0.521, "step": 7273 }, { "epoch": 1.06, "grad_norm": 7.607059955596924, "learning_rate": 1.523119233357427e-06, "loss": 0.5277, "step": 7274 }, { "epoch": 1.06, "grad_norm": 6.516847133636475, "learning_rate": 1.5229843289711556e-06, "loss": 0.6139, "step": 7275 }, { "epoch": 1.06, "grad_norm": 5.738144874572754, "learning_rate": 1.5228494114823618e-06, "loss": 0.4719, "step": 7276 }, { "epoch": 1.06, "grad_norm": 6.553291320800781, "learning_rate": 1.5227144808944258e-06, "loss": 0.5282, "step": 7277 }, { "epoch": 1.06, "grad_norm": 5.719482421875, "learning_rate": 1.5225795372107272e-06, "loss": 0.5157, "step": 7278 }, { "epoch": 1.06, "grad_norm": 5.741973400115967, "learning_rate": 1.5224445804346475e-06, "loss": 0.5218, "step": 7279 }, { "epoch": 1.06, "grad_norm": 6.3902668952941895, "learning_rate": 1.5223096105695682e-06, "loss": 0.5996, "step": 7280 }, { "epoch": 1.06, "grad_norm": 6.488818645477295, "learning_rate": 1.5221746276188698e-06, "loss": 0.5472, "step": 7281 }, { "epoch": 1.06, "grad_norm": 6.357085704803467, "learning_rate": 1.5220396315859347e-06, "loss": 0.5128, "step": 7282 }, { "epoch": 1.06, "grad_norm": 6.537604331970215, "learning_rate": 1.5219046224741447e-06, "loss": 0.5013, "step": 7283 }, { "epoch": 1.06, "grad_norm": 6.199648380279541, "learning_rate": 1.5217696002868827e-06, "loss": 0.48, "step": 7284 }, { "epoch": 1.06, "grad_norm": 6.413943767547607, "learning_rate": 1.521634565027531e-06, "loss": 0.5611, "step": 7285 }, { "epoch": 1.06, "grad_norm": 6.2841973304748535, "learning_rate": 1.521499516699473e-06, "loss": 0.5491, "step": 7286 }, { "epoch": 1.06, "grad_norm": 7.619417667388916, "learning_rate": 1.5213644553060915e-06, "loss": 0.5403, "step": 7287 }, { "epoch": 1.06, "grad_norm": 6.402480125427246, "learning_rate": 1.5212293808507711e-06, "loss": 0.5718, "step": 7288 }, { "epoch": 1.06, "grad_norm": 5.977594375610352, "learning_rate": 1.5210942933368954e-06, "loss": 0.5497, "step": 7289 }, { "epoch": 1.06, "grad_norm": 6.352766513824463, "learning_rate": 1.5209591927678488e-06, "loss": 0.5952, "step": 7290 }, { "epoch": 1.06, "grad_norm": 6.6466217041015625, "learning_rate": 1.5208240791470162e-06, "loss": 0.5029, "step": 7291 }, { "epoch": 1.06, "grad_norm": 5.7449750900268555, "learning_rate": 1.5206889524777821e-06, "loss": 0.5382, "step": 7292 }, { "epoch": 1.06, "grad_norm": 6.0404462814331055, "learning_rate": 1.520553812763533e-06, "loss": 0.502, "step": 7293 }, { "epoch": 1.06, "grad_norm": 6.702091217041016, "learning_rate": 1.5204186600076536e-06, "loss": 0.6308, "step": 7294 }, { "epoch": 1.06, "grad_norm": 5.809391498565674, "learning_rate": 1.5202834942135304e-06, "loss": 0.5669, "step": 7295 }, { "epoch": 1.06, "grad_norm": 5.8786115646362305, "learning_rate": 1.5201483153845495e-06, "loss": 0.5107, "step": 7296 }, { "epoch": 1.06, "grad_norm": 6.550402641296387, "learning_rate": 1.5200131235240975e-06, "loss": 0.529, "step": 7297 }, { "epoch": 1.06, "grad_norm": 6.024526596069336, "learning_rate": 1.5198779186355618e-06, "loss": 0.5262, "step": 7298 }, { "epoch": 1.06, "grad_norm": 5.912257671356201, "learning_rate": 1.5197427007223297e-06, "loss": 0.5068, "step": 7299 }, { "epoch": 1.06, "grad_norm": 6.533030986785889, "learning_rate": 1.5196074697877888e-06, "loss": 0.58, "step": 7300 }, { "epoch": 1.06, "grad_norm": 6.909339904785156, "learning_rate": 1.5194722258353267e-06, "loss": 0.5836, "step": 7301 }, { "epoch": 1.06, "grad_norm": 6.003173828125, "learning_rate": 1.519336968868332e-06, "loss": 0.4454, "step": 7302 }, { "epoch": 1.06, "grad_norm": 5.740733623504639, "learning_rate": 1.519201698890194e-06, "loss": 0.5515, "step": 7303 }, { "epoch": 1.06, "grad_norm": 6.842934608459473, "learning_rate": 1.5190664159043004e-06, "loss": 0.6556, "step": 7304 }, { "epoch": 1.06, "grad_norm": 7.065102577209473, "learning_rate": 1.5189311199140416e-06, "loss": 0.5776, "step": 7305 }, { "epoch": 1.06, "grad_norm": 6.114382743835449, "learning_rate": 1.5187958109228062e-06, "loss": 0.6012, "step": 7306 }, { "epoch": 1.06, "grad_norm": 6.273312091827393, "learning_rate": 1.518660488933985e-06, "loss": 0.6567, "step": 7307 }, { "epoch": 1.06, "grad_norm": 6.833224773406982, "learning_rate": 1.5185251539509682e-06, "loss": 0.5884, "step": 7308 }, { "epoch": 1.06, "grad_norm": 5.791629791259766, "learning_rate": 1.5183898059771462e-06, "loss": 0.4985, "step": 7309 }, { "epoch": 1.06, "grad_norm": 6.5932416915893555, "learning_rate": 1.5182544450159096e-06, "loss": 0.5128, "step": 7310 }, { "epoch": 1.06, "grad_norm": 7.409097194671631, "learning_rate": 1.5181190710706501e-06, "loss": 0.5649, "step": 7311 }, { "epoch": 1.06, "grad_norm": 6.1662750244140625, "learning_rate": 1.517983684144759e-06, "loss": 0.5849, "step": 7312 }, { "epoch": 1.06, "grad_norm": 5.563567161560059, "learning_rate": 1.5178482842416286e-06, "loss": 0.4737, "step": 7313 }, { "epoch": 1.06, "grad_norm": 5.6754255294799805, "learning_rate": 1.5177128713646505e-06, "loss": 0.5331, "step": 7314 }, { "epoch": 1.06, "grad_norm": 6.081894874572754, "learning_rate": 1.517577445517218e-06, "loss": 0.5714, "step": 7315 }, { "epoch": 1.06, "grad_norm": 6.819153308868408, "learning_rate": 1.517442006702723e-06, "loss": 0.5842, "step": 7316 }, { "epoch": 1.06, "grad_norm": 6.019796371459961, "learning_rate": 1.51730655492456e-06, "loss": 0.5332, "step": 7317 }, { "epoch": 1.06, "grad_norm": 6.125685214996338, "learning_rate": 1.5171710901861213e-06, "loss": 0.5685, "step": 7318 }, { "epoch": 1.06, "grad_norm": 6.729629039764404, "learning_rate": 1.5170356124908012e-06, "loss": 0.4421, "step": 7319 }, { "epoch": 1.06, "grad_norm": 6.16766881942749, "learning_rate": 1.5169001218419941e-06, "loss": 0.5062, "step": 7320 }, { "epoch": 1.06, "grad_norm": 6.465593338012695, "learning_rate": 1.5167646182430942e-06, "loss": 0.5325, "step": 7321 }, { "epoch": 1.06, "grad_norm": 6.270765781402588, "learning_rate": 1.5166291016974971e-06, "loss": 0.5365, "step": 7322 }, { "epoch": 1.06, "grad_norm": 6.066649436950684, "learning_rate": 1.5164935722085965e-06, "loss": 0.5346, "step": 7323 }, { "epoch": 1.06, "grad_norm": 5.934194087982178, "learning_rate": 1.516358029779789e-06, "loss": 0.4637, "step": 7324 }, { "epoch": 1.06, "grad_norm": 5.769416332244873, "learning_rate": 1.5162224744144697e-06, "loss": 0.4875, "step": 7325 }, { "epoch": 1.06, "grad_norm": 6.06607723236084, "learning_rate": 1.5160869061160356e-06, "loss": 0.5871, "step": 7326 }, { "epoch": 1.06, "grad_norm": 6.685041427612305, "learning_rate": 1.5159513248878824e-06, "loss": 0.504, "step": 7327 }, { "epoch": 1.06, "grad_norm": 5.997219562530518, "learning_rate": 1.515815730733407e-06, "loss": 0.5212, "step": 7328 }, { "epoch": 1.06, "grad_norm": 6.399946212768555, "learning_rate": 1.5156801236560066e-06, "loss": 0.5586, "step": 7329 }, { "epoch": 1.06, "grad_norm": 5.906559944152832, "learning_rate": 1.5155445036590788e-06, "loss": 0.4988, "step": 7330 }, { "epoch": 1.06, "grad_norm": 6.5278401374816895, "learning_rate": 1.5154088707460208e-06, "loss": 0.551, "step": 7331 }, { "epoch": 1.06, "grad_norm": 6.2112507820129395, "learning_rate": 1.5152732249202314e-06, "loss": 0.5418, "step": 7332 }, { "epoch": 1.06, "grad_norm": 5.702336311340332, "learning_rate": 1.5151375661851083e-06, "loss": 0.5377, "step": 7333 }, { "epoch": 1.06, "grad_norm": 6.786070823669434, "learning_rate": 1.5150018945440505e-06, "loss": 0.4848, "step": 7334 }, { "epoch": 1.06, "grad_norm": 6.171247482299805, "learning_rate": 1.514866210000457e-06, "loss": 0.5632, "step": 7335 }, { "epoch": 1.06, "grad_norm": 6.9091315269470215, "learning_rate": 1.5147305125577274e-06, "loss": 0.5543, "step": 7336 }, { "epoch": 1.06, "grad_norm": 6.48356819152832, "learning_rate": 1.514594802219261e-06, "loss": 0.5693, "step": 7337 }, { "epoch": 1.06, "grad_norm": 7.135701656341553, "learning_rate": 1.5144590789884579e-06, "loss": 0.591, "step": 7338 }, { "epoch": 1.06, "grad_norm": 6.1756439208984375, "learning_rate": 1.5143233428687187e-06, "loss": 0.4734, "step": 7339 }, { "epoch": 1.07, "grad_norm": 5.312831878662109, "learning_rate": 1.5141875938634434e-06, "loss": 0.4971, "step": 7340 }, { "epoch": 1.07, "grad_norm": 5.691044330596924, "learning_rate": 1.5140518319760338e-06, "loss": 0.5099, "step": 7341 }, { "epoch": 1.07, "grad_norm": 6.680451393127441, "learning_rate": 1.5139160572098907e-06, "loss": 0.5511, "step": 7342 }, { "epoch": 1.07, "grad_norm": 5.68629789352417, "learning_rate": 1.5137802695684156e-06, "loss": 0.5053, "step": 7343 }, { "epoch": 1.07, "grad_norm": 6.60244083404541, "learning_rate": 1.513644469055011e-06, "loss": 0.5597, "step": 7344 }, { "epoch": 1.07, "grad_norm": 6.759899139404297, "learning_rate": 1.5135086556730788e-06, "loss": 0.6633, "step": 7345 }, { "epoch": 1.07, "grad_norm": 6.107605457305908, "learning_rate": 1.5133728294260211e-06, "loss": 0.5063, "step": 7346 }, { "epoch": 1.07, "grad_norm": 6.120129585266113, "learning_rate": 1.5132369903172417e-06, "loss": 0.5769, "step": 7347 }, { "epoch": 1.07, "grad_norm": 6.652952194213867, "learning_rate": 1.5131011383501436e-06, "loss": 0.6142, "step": 7348 }, { "epoch": 1.07, "grad_norm": 6.620185852050781, "learning_rate": 1.5129652735281298e-06, "loss": 0.6027, "step": 7349 }, { "epoch": 1.07, "grad_norm": 5.686830043792725, "learning_rate": 1.5128293958546047e-06, "loss": 0.4987, "step": 7350 }, { "epoch": 1.07, "grad_norm": 5.406515121459961, "learning_rate": 1.5126935053329724e-06, "loss": 0.509, "step": 7351 }, { "epoch": 1.07, "grad_norm": 5.9567646980285645, "learning_rate": 1.5125576019666376e-06, "loss": 0.5561, "step": 7352 }, { "epoch": 1.07, "grad_norm": 7.4006123542785645, "learning_rate": 1.5124216857590043e-06, "loss": 0.601, "step": 7353 }, { "epoch": 1.07, "grad_norm": 6.807982921600342, "learning_rate": 1.5122857567134788e-06, "loss": 0.5555, "step": 7354 }, { "epoch": 1.07, "grad_norm": 5.580843448638916, "learning_rate": 1.5121498148334656e-06, "loss": 0.52, "step": 7355 }, { "epoch": 1.07, "grad_norm": 6.721278667449951, "learning_rate": 1.512013860122371e-06, "loss": 0.5583, "step": 7356 }, { "epoch": 1.07, "grad_norm": 6.2582478523254395, "learning_rate": 1.5118778925836014e-06, "loss": 0.5127, "step": 7357 }, { "epoch": 1.07, "grad_norm": 5.88439416885376, "learning_rate": 1.5117419122205628e-06, "loss": 0.4787, "step": 7358 }, { "epoch": 1.07, "grad_norm": 6.140251159667969, "learning_rate": 1.5116059190366622e-06, "loss": 0.5914, "step": 7359 }, { "epoch": 1.07, "grad_norm": 7.27172327041626, "learning_rate": 1.5114699130353063e-06, "loss": 0.5588, "step": 7360 }, { "epoch": 1.07, "grad_norm": 5.611893177032471, "learning_rate": 1.511333894219903e-06, "loss": 0.5271, "step": 7361 }, { "epoch": 1.07, "grad_norm": 6.008410453796387, "learning_rate": 1.5111978625938596e-06, "loss": 0.4488, "step": 7362 }, { "epoch": 1.07, "grad_norm": 5.831282138824463, "learning_rate": 1.5110618181605843e-06, "loss": 0.5588, "step": 7363 }, { "epoch": 1.07, "grad_norm": 6.735928058624268, "learning_rate": 1.5109257609234858e-06, "loss": 0.5137, "step": 7364 }, { "epoch": 1.07, "grad_norm": 6.557182788848877, "learning_rate": 1.5107896908859728e-06, "loss": 0.5843, "step": 7365 }, { "epoch": 1.07, "grad_norm": 6.61542272567749, "learning_rate": 1.5106536080514535e-06, "loss": 0.5742, "step": 7366 }, { "epoch": 1.07, "grad_norm": 6.551069259643555, "learning_rate": 1.510517512423338e-06, "loss": 0.5331, "step": 7367 }, { "epoch": 1.07, "grad_norm": 6.340932846069336, "learning_rate": 1.5103814040050355e-06, "loss": 0.5987, "step": 7368 }, { "epoch": 1.07, "grad_norm": 6.321884632110596, "learning_rate": 1.5102452827999567e-06, "loss": 0.6043, "step": 7369 }, { "epoch": 1.07, "grad_norm": 6.076740264892578, "learning_rate": 1.5101091488115108e-06, "loss": 0.5977, "step": 7370 }, { "epoch": 1.07, "grad_norm": 6.371342182159424, "learning_rate": 1.5099730020431092e-06, "loss": 0.5286, "step": 7371 }, { "epoch": 1.07, "grad_norm": 7.0835280418396, "learning_rate": 1.5098368424981627e-06, "loss": 0.5382, "step": 7372 }, { "epoch": 1.07, "grad_norm": 6.245148658752441, "learning_rate": 1.5097006701800826e-06, "loss": 0.5964, "step": 7373 }, { "epoch": 1.07, "grad_norm": 6.541118621826172, "learning_rate": 1.5095644850922802e-06, "loss": 0.5546, "step": 7374 }, { "epoch": 1.07, "grad_norm": 6.23347806930542, "learning_rate": 1.5094282872381678e-06, "loss": 0.5484, "step": 7375 }, { "epoch": 1.07, "grad_norm": 6.32146692276001, "learning_rate": 1.5092920766211572e-06, "loss": 0.5003, "step": 7376 }, { "epoch": 1.07, "grad_norm": 6.37252950668335, "learning_rate": 1.5091558532446614e-06, "loss": 0.5287, "step": 7377 }, { "epoch": 1.07, "grad_norm": 5.894375324249268, "learning_rate": 1.5090196171120927e-06, "loss": 0.4986, "step": 7378 }, { "epoch": 1.07, "grad_norm": 6.687241554260254, "learning_rate": 1.508883368226865e-06, "loss": 0.5149, "step": 7379 }, { "epoch": 1.07, "grad_norm": 6.6516947746276855, "learning_rate": 1.508747106592391e-06, "loss": 0.616, "step": 7380 }, { "epoch": 1.07, "grad_norm": 6.504850387573242, "learning_rate": 1.508610832212085e-06, "loss": 0.5914, "step": 7381 }, { "epoch": 1.07, "grad_norm": 7.60616397857666, "learning_rate": 1.5084745450893608e-06, "loss": 0.5385, "step": 7382 }, { "epoch": 1.07, "grad_norm": 6.125839710235596, "learning_rate": 1.5083382452276333e-06, "loss": 0.5223, "step": 7383 }, { "epoch": 1.07, "grad_norm": 6.61430549621582, "learning_rate": 1.5082019326303169e-06, "loss": 0.5076, "step": 7384 }, { "epoch": 1.07, "grad_norm": 7.263178825378418, "learning_rate": 1.5080656073008266e-06, "loss": 0.5062, "step": 7385 }, { "epoch": 1.07, "grad_norm": 6.2481279373168945, "learning_rate": 1.5079292692425788e-06, "loss": 0.5419, "step": 7386 }, { "epoch": 1.07, "grad_norm": 6.5568742752075195, "learning_rate": 1.5077929184589878e-06, "loss": 0.5771, "step": 7387 }, { "epoch": 1.07, "grad_norm": 5.523181438446045, "learning_rate": 1.5076565549534702e-06, "loss": 0.5653, "step": 7388 }, { "epoch": 1.07, "grad_norm": 5.844807147979736, "learning_rate": 1.5075201787294426e-06, "loss": 0.5716, "step": 7389 }, { "epoch": 1.07, "grad_norm": 6.1882195472717285, "learning_rate": 1.5073837897903219e-06, "loss": 0.5145, "step": 7390 }, { "epoch": 1.07, "grad_norm": 6.299971580505371, "learning_rate": 1.5072473881395244e-06, "loss": 0.5833, "step": 7391 }, { "epoch": 1.07, "grad_norm": 6.248979568481445, "learning_rate": 1.5071109737804676e-06, "loss": 0.498, "step": 7392 }, { "epoch": 1.07, "grad_norm": 6.223628520965576, "learning_rate": 1.50697454671657e-06, "loss": 0.4988, "step": 7393 }, { "epoch": 1.07, "grad_norm": 5.6945929527282715, "learning_rate": 1.5068381069512484e-06, "loss": 0.4856, "step": 7394 }, { "epoch": 1.07, "grad_norm": 6.407414436340332, "learning_rate": 1.5067016544879217e-06, "loss": 0.507, "step": 7395 }, { "epoch": 1.07, "grad_norm": 5.314751148223877, "learning_rate": 1.5065651893300082e-06, "loss": 0.4531, "step": 7396 }, { "epoch": 1.07, "grad_norm": 6.166492462158203, "learning_rate": 1.506428711480927e-06, "loss": 0.5052, "step": 7397 }, { "epoch": 1.07, "grad_norm": 6.891802787780762, "learning_rate": 1.5062922209440974e-06, "loss": 0.5309, "step": 7398 }, { "epoch": 1.07, "grad_norm": 6.978551864624023, "learning_rate": 1.5061557177229389e-06, "loss": 0.5574, "step": 7399 }, { "epoch": 1.07, "grad_norm": 5.895935535430908, "learning_rate": 1.5060192018208712e-06, "loss": 0.4888, "step": 7400 }, { "epoch": 1.07, "grad_norm": 6.570816516876221, "learning_rate": 1.5058826732413146e-06, "loss": 0.5815, "step": 7401 }, { "epoch": 1.07, "grad_norm": 6.690134048461914, "learning_rate": 1.5057461319876893e-06, "loss": 0.5485, "step": 7402 }, { "epoch": 1.07, "grad_norm": 6.948081016540527, "learning_rate": 1.5056095780634168e-06, "loss": 0.5214, "step": 7403 }, { "epoch": 1.07, "grad_norm": 5.604112148284912, "learning_rate": 1.505473011471918e-06, "loss": 0.596, "step": 7404 }, { "epoch": 1.07, "grad_norm": 6.369122505187988, "learning_rate": 1.5053364322166137e-06, "loss": 0.5423, "step": 7405 }, { "epoch": 1.07, "grad_norm": 6.238114833831787, "learning_rate": 1.5051998403009267e-06, "loss": 0.5596, "step": 7406 }, { "epoch": 1.07, "grad_norm": 6.6334710121154785, "learning_rate": 1.5050632357282784e-06, "loss": 0.4534, "step": 7407 }, { "epoch": 1.07, "grad_norm": 6.1977763175964355, "learning_rate": 1.5049266185020913e-06, "loss": 0.5076, "step": 7408 }, { "epoch": 1.08, "grad_norm": 5.550224304199219, "learning_rate": 1.504789988625788e-06, "loss": 0.5164, "step": 7409 }, { "epoch": 1.08, "grad_norm": 6.455738067626953, "learning_rate": 1.504653346102792e-06, "loss": 0.5295, "step": 7410 }, { "epoch": 1.08, "grad_norm": 6.994697093963623, "learning_rate": 1.5045166909365264e-06, "loss": 0.6246, "step": 7411 }, { "epoch": 1.08, "grad_norm": 7.013139247894287, "learning_rate": 1.5043800231304147e-06, "loss": 0.562, "step": 7412 }, { "epoch": 1.08, "grad_norm": 6.630362033843994, "learning_rate": 1.5042433426878812e-06, "loss": 0.5373, "step": 7413 }, { "epoch": 1.08, "grad_norm": 6.402016639709473, "learning_rate": 1.5041066496123498e-06, "loss": 0.5152, "step": 7414 }, { "epoch": 1.08, "grad_norm": 6.238883972167969, "learning_rate": 1.5039699439072458e-06, "loss": 0.5344, "step": 7415 }, { "epoch": 1.08, "grad_norm": 7.575631141662598, "learning_rate": 1.503833225575993e-06, "loss": 0.5386, "step": 7416 }, { "epoch": 1.08, "grad_norm": 6.61262321472168, "learning_rate": 1.503696494622018e-06, "loss": 0.5455, "step": 7417 }, { "epoch": 1.08, "grad_norm": 6.235195159912109, "learning_rate": 1.5035597510487456e-06, "loss": 0.5323, "step": 7418 }, { "epoch": 1.08, "grad_norm": 6.54226016998291, "learning_rate": 1.5034229948596017e-06, "loss": 0.4782, "step": 7419 }, { "epoch": 1.08, "grad_norm": 6.027591705322266, "learning_rate": 1.5032862260580127e-06, "loss": 0.5341, "step": 7420 }, { "epoch": 1.08, "grad_norm": 6.4344353675842285, "learning_rate": 1.5031494446474056e-06, "loss": 0.5766, "step": 7421 }, { "epoch": 1.08, "grad_norm": 6.2020673751831055, "learning_rate": 1.5030126506312062e-06, "loss": 0.5835, "step": 7422 }, { "epoch": 1.08, "grad_norm": 6.37158727645874, "learning_rate": 1.502875844012842e-06, "loss": 0.5312, "step": 7423 }, { "epoch": 1.08, "grad_norm": 6.797161102294922, "learning_rate": 1.5027390247957406e-06, "loss": 0.6054, "step": 7424 }, { "epoch": 1.08, "grad_norm": 7.012781143188477, "learning_rate": 1.5026021929833302e-06, "loss": 0.525, "step": 7425 }, { "epoch": 1.08, "grad_norm": 6.936744213104248, "learning_rate": 1.502465348579038e-06, "loss": 0.5128, "step": 7426 }, { "epoch": 1.08, "grad_norm": 5.983521461486816, "learning_rate": 1.5023284915862935e-06, "loss": 0.487, "step": 7427 }, { "epoch": 1.08, "grad_norm": 7.9022650718688965, "learning_rate": 1.5021916220085244e-06, "loss": 0.5623, "step": 7428 }, { "epoch": 1.08, "grad_norm": 6.134127616882324, "learning_rate": 1.5020547398491603e-06, "loss": 0.5277, "step": 7429 }, { "epoch": 1.08, "grad_norm": 6.228829383850098, "learning_rate": 1.5019178451116305e-06, "loss": 0.5104, "step": 7430 }, { "epoch": 1.08, "grad_norm": 6.999906063079834, "learning_rate": 1.5017809377993646e-06, "loss": 0.5492, "step": 7431 }, { "epoch": 1.08, "grad_norm": 6.920705318450928, "learning_rate": 1.5016440179157926e-06, "loss": 0.6076, "step": 7432 }, { "epoch": 1.08, "grad_norm": 6.0189008712768555, "learning_rate": 1.5015070854643449e-06, "loss": 0.479, "step": 7433 }, { "epoch": 1.08, "grad_norm": 6.732174873352051, "learning_rate": 1.501370140448452e-06, "loss": 0.5606, "step": 7434 }, { "epoch": 1.08, "grad_norm": 6.035677909851074, "learning_rate": 1.501233182871545e-06, "loss": 0.5213, "step": 7435 }, { "epoch": 1.08, "grad_norm": 6.87822961807251, "learning_rate": 1.5010962127370545e-06, "loss": 0.5611, "step": 7436 }, { "epoch": 1.08, "grad_norm": 6.659493446350098, "learning_rate": 1.500959230048413e-06, "loss": 0.5992, "step": 7437 }, { "epoch": 1.08, "grad_norm": 6.379667282104492, "learning_rate": 1.500822234809052e-06, "loss": 0.5516, "step": 7438 }, { "epoch": 1.08, "grad_norm": 6.213195323944092, "learning_rate": 1.5006852270224036e-06, "loss": 0.5143, "step": 7439 }, { "epoch": 1.08, "grad_norm": 6.44244384765625, "learning_rate": 1.5005482066919002e-06, "loss": 0.534, "step": 7440 }, { "epoch": 1.08, "grad_norm": 5.564032077789307, "learning_rate": 1.500411173820975e-06, "loss": 0.5456, "step": 7441 }, { "epoch": 1.08, "grad_norm": 7.041964054107666, "learning_rate": 1.500274128413061e-06, "loss": 0.7034, "step": 7442 }, { "epoch": 1.08, "grad_norm": 6.414770603179932, "learning_rate": 1.5001370704715911e-06, "loss": 0.4872, "step": 7443 }, { "epoch": 1.08, "grad_norm": 5.754978656768799, "learning_rate": 1.5e-06, "loss": 0.5298, "step": 7444 }, { "epoch": 1.08, "grad_norm": 6.577816963195801, "learning_rate": 1.4998629170017213e-06, "loss": 0.5796, "step": 7445 }, { "epoch": 1.08, "grad_norm": 6.443281650543213, "learning_rate": 1.499725821480189e-06, "loss": 0.564, "step": 7446 }, { "epoch": 1.08, "grad_norm": 5.773868083953857, "learning_rate": 1.4995887134388385e-06, "loss": 0.5002, "step": 7447 }, { "epoch": 1.08, "grad_norm": 6.243585109710693, "learning_rate": 1.4994515928811046e-06, "loss": 0.5628, "step": 7448 }, { "epoch": 1.08, "grad_norm": 6.088831424713135, "learning_rate": 1.4993144598104223e-06, "loss": 0.6087, "step": 7449 }, { "epoch": 1.08, "grad_norm": 5.692991733551025, "learning_rate": 1.4991773142302278e-06, "loss": 0.4894, "step": 7450 }, { "epoch": 1.08, "grad_norm": 6.305367946624756, "learning_rate": 1.4990401561439566e-06, "loss": 0.5819, "step": 7451 }, { "epoch": 1.08, "grad_norm": 7.254780292510986, "learning_rate": 1.4989029855550453e-06, "loss": 0.5907, "step": 7452 }, { "epoch": 1.08, "grad_norm": 6.198267459869385, "learning_rate": 1.49876580246693e-06, "loss": 0.5305, "step": 7453 }, { "epoch": 1.08, "grad_norm": 5.6126508712768555, "learning_rate": 1.4986286068830483e-06, "loss": 0.5072, "step": 7454 }, { "epoch": 1.08, "grad_norm": 7.443038463592529, "learning_rate": 1.4984913988068368e-06, "loss": 0.6358, "step": 7455 }, { "epoch": 1.08, "grad_norm": 6.00661039352417, "learning_rate": 1.4983541782417334e-06, "loss": 0.504, "step": 7456 }, { "epoch": 1.08, "grad_norm": 6.265142917633057, "learning_rate": 1.4982169451911758e-06, "loss": 0.5308, "step": 7457 }, { "epoch": 1.08, "grad_norm": 5.188093185424805, "learning_rate": 1.4980796996586023e-06, "loss": 0.5232, "step": 7458 }, { "epoch": 1.08, "grad_norm": 5.905338764190674, "learning_rate": 1.4979424416474508e-06, "loss": 0.4784, "step": 7459 }, { "epoch": 1.08, "grad_norm": 6.353815078735352, "learning_rate": 1.4978051711611608e-06, "loss": 0.5365, "step": 7460 }, { "epoch": 1.08, "grad_norm": 6.685861110687256, "learning_rate": 1.497667888203171e-06, "loss": 0.5893, "step": 7461 }, { "epoch": 1.08, "grad_norm": 6.205343723297119, "learning_rate": 1.4975305927769211e-06, "loss": 0.5082, "step": 7462 }, { "epoch": 1.08, "grad_norm": 5.726376533508301, "learning_rate": 1.4973932848858504e-06, "loss": 0.5933, "step": 7463 }, { "epoch": 1.08, "grad_norm": 6.858853340148926, "learning_rate": 1.497255964533399e-06, "loss": 0.5837, "step": 7464 }, { "epoch": 1.08, "grad_norm": 7.090020656585693, "learning_rate": 1.4971186317230076e-06, "loss": 0.6598, "step": 7465 }, { "epoch": 1.08, "grad_norm": 6.685990333557129, "learning_rate": 1.4969812864581164e-06, "loss": 0.5148, "step": 7466 }, { "epoch": 1.08, "grad_norm": 6.427465438842773, "learning_rate": 1.4968439287421668e-06, "loss": 0.6125, "step": 7467 }, { "epoch": 1.08, "grad_norm": 6.348628520965576, "learning_rate": 1.4967065585785996e-06, "loss": 0.5895, "step": 7468 }, { "epoch": 1.08, "grad_norm": 5.986368656158447, "learning_rate": 1.496569175970857e-06, "loss": 0.4958, "step": 7469 }, { "epoch": 1.08, "grad_norm": 6.589692115783691, "learning_rate": 1.4964317809223804e-06, "loss": 0.576, "step": 7470 }, { "epoch": 1.08, "grad_norm": 7.105018615722656, "learning_rate": 1.4962943734366118e-06, "loss": 0.6104, "step": 7471 }, { "epoch": 1.08, "grad_norm": 6.659573078155518, "learning_rate": 1.4961569535169944e-06, "loss": 0.5491, "step": 7472 }, { "epoch": 1.08, "grad_norm": 6.565704345703125, "learning_rate": 1.4960195211669707e-06, "loss": 0.6109, "step": 7473 }, { "epoch": 1.08, "grad_norm": 6.682244300842285, "learning_rate": 1.4958820763899836e-06, "loss": 0.4902, "step": 7474 }, { "epoch": 1.08, "grad_norm": 5.909312725067139, "learning_rate": 1.495744619189477e-06, "loss": 0.4959, "step": 7475 }, { "epoch": 1.08, "grad_norm": 6.294835567474365, "learning_rate": 1.4956071495688943e-06, "loss": 0.5245, "step": 7476 }, { "epoch": 1.08, "grad_norm": 5.959265232086182, "learning_rate": 1.49546966753168e-06, "loss": 0.5569, "step": 7477 }, { "epoch": 1.09, "grad_norm": 6.487384796142578, "learning_rate": 1.495332173081278e-06, "loss": 0.6179, "step": 7478 }, { "epoch": 1.09, "grad_norm": 6.254613399505615, "learning_rate": 1.4951946662211334e-06, "loss": 0.51, "step": 7479 }, { "epoch": 1.09, "grad_norm": 5.841681480407715, "learning_rate": 1.4950571469546911e-06, "loss": 0.5331, "step": 7480 }, { "epoch": 1.09, "grad_norm": 5.908403396606445, "learning_rate": 1.4949196152853963e-06, "loss": 0.6255, "step": 7481 }, { "epoch": 1.09, "grad_norm": 5.901795387268066, "learning_rate": 1.4947820712166945e-06, "loss": 0.522, "step": 7482 }, { "epoch": 1.09, "grad_norm": 6.582183837890625, "learning_rate": 1.4946445147520322e-06, "loss": 0.5274, "step": 7483 }, { "epoch": 1.09, "grad_norm": 5.500977516174316, "learning_rate": 1.4945069458948549e-06, "loss": 0.5229, "step": 7484 }, { "epoch": 1.09, "grad_norm": 6.742741584777832, "learning_rate": 1.4943693646486098e-06, "loss": 0.5966, "step": 7485 }, { "epoch": 1.09, "grad_norm": 6.317999839782715, "learning_rate": 1.4942317710167433e-06, "loss": 0.566, "step": 7486 }, { "epoch": 1.09, "grad_norm": 7.305227756500244, "learning_rate": 1.4940941650027032e-06, "loss": 0.609, "step": 7487 }, { "epoch": 1.09, "grad_norm": 6.974672794342041, "learning_rate": 1.4939565466099365e-06, "loss": 0.5411, "step": 7488 }, { "epoch": 1.09, "grad_norm": 6.016592979431152, "learning_rate": 1.4938189158418907e-06, "loss": 0.5189, "step": 7489 }, { "epoch": 1.09, "grad_norm": 6.337794780731201, "learning_rate": 1.4936812727020152e-06, "loss": 0.5528, "step": 7490 }, { "epoch": 1.09, "grad_norm": 5.7474517822265625, "learning_rate": 1.4935436171937568e-06, "loss": 0.5486, "step": 7491 }, { "epoch": 1.09, "grad_norm": 6.897729396820068, "learning_rate": 1.4934059493205654e-06, "loss": 0.5867, "step": 7492 }, { "epoch": 1.09, "grad_norm": 5.745553970336914, "learning_rate": 1.4932682690858893e-06, "loss": 0.5563, "step": 7493 }, { "epoch": 1.09, "grad_norm": 5.683262348175049, "learning_rate": 1.4931305764931783e-06, "loss": 0.5229, "step": 7494 }, { "epoch": 1.09, "grad_norm": 5.865053653717041, "learning_rate": 1.4929928715458823e-06, "loss": 0.5318, "step": 7495 }, { "epoch": 1.09, "grad_norm": 6.3365159034729, "learning_rate": 1.4928551542474504e-06, "loss": 0.6169, "step": 7496 }, { "epoch": 1.09, "grad_norm": 6.027994155883789, "learning_rate": 1.492717424601334e-06, "loss": 0.5882, "step": 7497 }, { "epoch": 1.09, "grad_norm": 6.284191608428955, "learning_rate": 1.4925796826109828e-06, "loss": 0.549, "step": 7498 }, { "epoch": 1.09, "grad_norm": 7.9065070152282715, "learning_rate": 1.492441928279848e-06, "loss": 0.6487, "step": 7499 }, { "epoch": 1.09, "grad_norm": 6.692376613616943, "learning_rate": 1.4923041616113806e-06, "loss": 0.5874, "step": 7500 }, { "epoch": 1.09, "grad_norm": 6.215141296386719, "learning_rate": 1.4921663826090325e-06, "loss": 0.5615, "step": 7501 }, { "epoch": 1.09, "grad_norm": 7.095383167266846, "learning_rate": 1.4920285912762553e-06, "loss": 0.5622, "step": 7502 }, { "epoch": 1.09, "grad_norm": 6.367116928100586, "learning_rate": 1.4918907876165015e-06, "loss": 0.5581, "step": 7503 }, { "epoch": 1.09, "grad_norm": 6.733019828796387, "learning_rate": 1.491752971633223e-06, "loss": 0.5487, "step": 7504 }, { "epoch": 1.09, "grad_norm": 6.034603595733643, "learning_rate": 1.491615143329873e-06, "loss": 0.5498, "step": 7505 }, { "epoch": 1.09, "grad_norm": 6.259532928466797, "learning_rate": 1.491477302709904e-06, "loss": 0.5265, "step": 7506 }, { "epoch": 1.09, "grad_norm": 6.382843017578125, "learning_rate": 1.4913394497767702e-06, "loss": 0.4959, "step": 7507 }, { "epoch": 1.09, "grad_norm": 6.490556716918945, "learning_rate": 1.4912015845339245e-06, "loss": 0.5699, "step": 7508 }, { "epoch": 1.09, "grad_norm": 5.9040446281433105, "learning_rate": 1.4910637069848216e-06, "loss": 0.4603, "step": 7509 }, { "epoch": 1.09, "grad_norm": 6.70975399017334, "learning_rate": 1.4909258171329153e-06, "loss": 0.5707, "step": 7510 }, { "epoch": 1.09, "grad_norm": 6.195361614227295, "learning_rate": 1.4907879149816603e-06, "loss": 0.5188, "step": 7511 }, { "epoch": 1.09, "grad_norm": 6.1012725830078125, "learning_rate": 1.4906500005345113e-06, "loss": 0.505, "step": 7512 }, { "epoch": 1.09, "grad_norm": 7.112422466278076, "learning_rate": 1.4905120737949237e-06, "loss": 0.5264, "step": 7513 }, { "epoch": 1.09, "grad_norm": 6.387014865875244, "learning_rate": 1.4903741347663534e-06, "loss": 0.5389, "step": 7514 }, { "epoch": 1.09, "grad_norm": 6.293725967407227, "learning_rate": 1.4902361834522559e-06, "loss": 0.4835, "step": 7515 }, { "epoch": 1.09, "grad_norm": 6.327286243438721, "learning_rate": 1.4900982198560872e-06, "loss": 0.4624, "step": 7516 }, { "epoch": 1.09, "grad_norm": 5.805446624755859, "learning_rate": 1.489960243981304e-06, "loss": 0.5158, "step": 7517 }, { "epoch": 1.09, "grad_norm": 6.913002967834473, "learning_rate": 1.4898222558313631e-06, "loss": 0.4906, "step": 7518 }, { "epoch": 1.09, "grad_norm": 6.588164329528809, "learning_rate": 1.4896842554097213e-06, "loss": 0.6713, "step": 7519 }, { "epoch": 1.09, "grad_norm": 6.51630163192749, "learning_rate": 1.4895462427198363e-06, "loss": 0.5272, "step": 7520 }, { "epoch": 1.09, "grad_norm": 6.839371204376221, "learning_rate": 1.4894082177651655e-06, "loss": 0.5983, "step": 7521 }, { "epoch": 1.09, "grad_norm": 6.181161403656006, "learning_rate": 1.489270180549167e-06, "loss": 0.5349, "step": 7522 }, { "epoch": 1.09, "grad_norm": 5.469448566436768, "learning_rate": 1.4891321310752992e-06, "loss": 0.4997, "step": 7523 }, { "epoch": 1.09, "grad_norm": 7.5011773109436035, "learning_rate": 1.4889940693470203e-06, "loss": 0.6294, "step": 7524 }, { "epoch": 1.09, "grad_norm": 6.3460307121276855, "learning_rate": 1.48885599536779e-06, "loss": 0.5131, "step": 7525 }, { "epoch": 1.09, "grad_norm": 5.687887668609619, "learning_rate": 1.4887179091410669e-06, "loss": 0.5453, "step": 7526 }, { "epoch": 1.09, "grad_norm": 5.361422061920166, "learning_rate": 1.4885798106703102e-06, "loss": 0.5287, "step": 7527 }, { "epoch": 1.09, "grad_norm": 5.830379962921143, "learning_rate": 1.4884416999589806e-06, "loss": 0.5302, "step": 7528 }, { "epoch": 1.09, "grad_norm": 6.415863037109375, "learning_rate": 1.488303577010538e-06, "loss": 0.5996, "step": 7529 }, { "epoch": 1.09, "grad_norm": 6.384207248687744, "learning_rate": 1.4881654418284424e-06, "loss": 0.5617, "step": 7530 }, { "epoch": 1.09, "grad_norm": 6.4757080078125, "learning_rate": 1.4880272944161552e-06, "loss": 0.5511, "step": 7531 }, { "epoch": 1.09, "grad_norm": 6.146124839782715, "learning_rate": 1.4878891347771368e-06, "loss": 0.5807, "step": 7532 }, { "epoch": 1.09, "grad_norm": 6.800057411193848, "learning_rate": 1.487750962914849e-06, "loss": 0.5413, "step": 7533 }, { "epoch": 1.09, "grad_norm": 6.431886672973633, "learning_rate": 1.487612778832753e-06, "loss": 0.5097, "step": 7534 }, { "epoch": 1.09, "grad_norm": 5.784450054168701, "learning_rate": 1.4874745825343114e-06, "loss": 0.5246, "step": 7535 }, { "epoch": 1.09, "grad_norm": 6.0326385498046875, "learning_rate": 1.4873363740229863e-06, "loss": 0.5544, "step": 7536 }, { "epoch": 1.09, "grad_norm": 6.64306640625, "learning_rate": 1.4871981533022399e-06, "loss": 0.5651, "step": 7537 }, { "epoch": 1.09, "grad_norm": 6.140726089477539, "learning_rate": 1.4870599203755357e-06, "loss": 0.5461, "step": 7538 }, { "epoch": 1.09, "grad_norm": 5.585909366607666, "learning_rate": 1.4869216752463365e-06, "loss": 0.5142, "step": 7539 }, { "epoch": 1.09, "grad_norm": 6.856482982635498, "learning_rate": 1.4867834179181058e-06, "loss": 0.5611, "step": 7540 }, { "epoch": 1.09, "grad_norm": 6.669275283813477, "learning_rate": 1.4866451483943074e-06, "loss": 0.611, "step": 7541 }, { "epoch": 1.09, "grad_norm": 6.679006576538086, "learning_rate": 1.4865068666784058e-06, "loss": 0.5468, "step": 7542 }, { "epoch": 1.09, "grad_norm": 6.987709045410156, "learning_rate": 1.4863685727738648e-06, "loss": 0.587, "step": 7543 }, { "epoch": 1.09, "grad_norm": 6.148873329162598, "learning_rate": 1.4862302666841497e-06, "loss": 0.56, "step": 7544 }, { "epoch": 1.09, "grad_norm": 6.2743706703186035, "learning_rate": 1.4860919484127251e-06, "loss": 0.5844, "step": 7545 }, { "epoch": 1.09, "grad_norm": 5.978667736053467, "learning_rate": 1.485953617963057e-06, "loss": 0.4961, "step": 7546 }, { "epoch": 1.1, "grad_norm": 6.543502330780029, "learning_rate": 1.4858152753386102e-06, "loss": 0.5328, "step": 7547 }, { "epoch": 1.1, "grad_norm": 7.09163761138916, "learning_rate": 1.4856769205428513e-06, "loss": 0.5195, "step": 7548 }, { "epoch": 1.1, "grad_norm": 6.160355567932129, "learning_rate": 1.485538553579246e-06, "loss": 0.4792, "step": 7549 }, { "epoch": 1.1, "grad_norm": 6.796670436859131, "learning_rate": 1.4854001744512612e-06, "loss": 0.5589, "step": 7550 }, { "epoch": 1.1, "grad_norm": 6.0056867599487305, "learning_rate": 1.4852617831623639e-06, "loss": 0.545, "step": 7551 }, { "epoch": 1.1, "grad_norm": 6.147095203399658, "learning_rate": 1.485123379716021e-06, "loss": 0.5532, "step": 7552 }, { "epoch": 1.1, "grad_norm": 6.165410995483398, "learning_rate": 1.4849849641156999e-06, "loss": 0.4802, "step": 7553 }, { "epoch": 1.1, "grad_norm": 6.096724510192871, "learning_rate": 1.484846536364869e-06, "loss": 0.5204, "step": 7554 }, { "epoch": 1.1, "grad_norm": 5.478619575500488, "learning_rate": 1.4847080964669956e-06, "loss": 0.5137, "step": 7555 }, { "epoch": 1.1, "grad_norm": 6.421963691711426, "learning_rate": 1.4845696444255484e-06, "loss": 0.563, "step": 7556 }, { "epoch": 1.1, "grad_norm": 6.492281436920166, "learning_rate": 1.4844311802439963e-06, "loss": 0.6735, "step": 7557 }, { "epoch": 1.1, "grad_norm": 6.8171210289001465, "learning_rate": 1.4842927039258077e-06, "loss": 0.5769, "step": 7558 }, { "epoch": 1.1, "grad_norm": 6.903956413269043, "learning_rate": 1.4841542154744527e-06, "loss": 0.5087, "step": 7559 }, { "epoch": 1.1, "grad_norm": 6.209010601043701, "learning_rate": 1.4840157148934005e-06, "loss": 0.5836, "step": 7560 }, { "epoch": 1.1, "grad_norm": 6.428390026092529, "learning_rate": 1.4838772021861209e-06, "loss": 0.535, "step": 7561 }, { "epoch": 1.1, "grad_norm": 6.422860622406006, "learning_rate": 1.483738677356084e-06, "loss": 0.4984, "step": 7562 }, { "epoch": 1.1, "grad_norm": 6.035418510437012, "learning_rate": 1.4836001404067606e-06, "loss": 0.5999, "step": 7563 }, { "epoch": 1.1, "grad_norm": 6.31438684463501, "learning_rate": 1.4834615913416215e-06, "loss": 0.5336, "step": 7564 }, { "epoch": 1.1, "grad_norm": 6.678337097167969, "learning_rate": 1.483323030164138e-06, "loss": 0.5014, "step": 7565 }, { "epoch": 1.1, "grad_norm": 5.951401710510254, "learning_rate": 1.4831844568777813e-06, "loss": 0.5098, "step": 7566 }, { "epoch": 1.1, "grad_norm": 6.055258274078369, "learning_rate": 1.483045871486023e-06, "loss": 0.5827, "step": 7567 }, { "epoch": 1.1, "grad_norm": 6.610684871673584, "learning_rate": 1.482907273992335e-06, "loss": 0.4788, "step": 7568 }, { "epoch": 1.1, "grad_norm": 6.191213607788086, "learning_rate": 1.4827686644001903e-06, "loss": 0.5209, "step": 7569 }, { "epoch": 1.1, "grad_norm": 6.004494667053223, "learning_rate": 1.4826300427130608e-06, "loss": 0.5526, "step": 7570 }, { "epoch": 1.1, "grad_norm": 7.553361415863037, "learning_rate": 1.4824914089344198e-06, "loss": 0.581, "step": 7571 }, { "epoch": 1.1, "grad_norm": 6.552201271057129, "learning_rate": 1.4823527630677407e-06, "loss": 0.5972, "step": 7572 }, { "epoch": 1.1, "grad_norm": 5.878220081329346, "learning_rate": 1.4822141051164969e-06, "loss": 0.5292, "step": 7573 }, { "epoch": 1.1, "grad_norm": 6.493431091308594, "learning_rate": 1.482075435084162e-06, "loss": 0.6041, "step": 7574 }, { "epoch": 1.1, "grad_norm": 7.449909687042236, "learning_rate": 1.4819367529742106e-06, "loss": 0.5652, "step": 7575 }, { "epoch": 1.1, "grad_norm": 6.6071553230285645, "learning_rate": 1.4817980587901167e-06, "loss": 0.5928, "step": 7576 }, { "epoch": 1.1, "grad_norm": 6.339420795440674, "learning_rate": 1.4816593525353554e-06, "loss": 0.5487, "step": 7577 }, { "epoch": 1.1, "grad_norm": 6.457320213317871, "learning_rate": 1.4815206342134015e-06, "loss": 0.6035, "step": 7578 }, { "epoch": 1.1, "grad_norm": 6.131031513214111, "learning_rate": 1.4813819038277306e-06, "loss": 0.6458, "step": 7579 }, { "epoch": 1.1, "grad_norm": 6.3928046226501465, "learning_rate": 1.4812431613818182e-06, "loss": 0.4921, "step": 7580 }, { "epoch": 1.1, "grad_norm": 6.429529666900635, "learning_rate": 1.4811044068791404e-06, "loss": 0.5667, "step": 7581 }, { "epoch": 1.1, "grad_norm": 7.209794044494629, "learning_rate": 1.4809656403231733e-06, "loss": 0.631, "step": 7582 }, { "epoch": 1.1, "grad_norm": 7.365606784820557, "learning_rate": 1.4808268617173935e-06, "loss": 0.5443, "step": 7583 }, { "epoch": 1.1, "grad_norm": 7.058559894561768, "learning_rate": 1.4806880710652782e-06, "loss": 0.5811, "step": 7584 }, { "epoch": 1.1, "grad_norm": 6.736708164215088, "learning_rate": 1.4805492683703042e-06, "loss": 0.5816, "step": 7585 }, { "epoch": 1.1, "grad_norm": 5.502633571624756, "learning_rate": 1.480410453635949e-06, "loss": 0.5292, "step": 7586 }, { "epoch": 1.1, "grad_norm": 5.909699440002441, "learning_rate": 1.4802716268656908e-06, "loss": 0.5013, "step": 7587 }, { "epoch": 1.1, "grad_norm": 7.011367321014404, "learning_rate": 1.480132788063007e-06, "loss": 0.5634, "step": 7588 }, { "epoch": 1.1, "grad_norm": 6.676894664764404, "learning_rate": 1.4799939372313765e-06, "loss": 0.5494, "step": 7589 }, { "epoch": 1.1, "grad_norm": 6.681716442108154, "learning_rate": 1.4798550743742777e-06, "loss": 0.5405, "step": 7590 }, { "epoch": 1.1, "grad_norm": 6.141607284545898, "learning_rate": 1.4797161994951899e-06, "loss": 0.562, "step": 7591 }, { "epoch": 1.1, "grad_norm": 5.88098669052124, "learning_rate": 1.479577312597592e-06, "loss": 0.5121, "step": 7592 }, { "epoch": 1.1, "grad_norm": 6.438671588897705, "learning_rate": 1.4794384136849637e-06, "loss": 0.5708, "step": 7593 }, { "epoch": 1.1, "grad_norm": 6.420588970184326, "learning_rate": 1.4792995027607852e-06, "loss": 0.5334, "step": 7594 }, { "epoch": 1.1, "grad_norm": 6.713854789733887, "learning_rate": 1.4791605798285363e-06, "loss": 0.5124, "step": 7595 }, { "epoch": 1.1, "grad_norm": 6.497354984283447, "learning_rate": 1.4790216448916975e-06, "loss": 0.6093, "step": 7596 }, { "epoch": 1.1, "grad_norm": 5.861385822296143, "learning_rate": 1.47888269795375e-06, "loss": 0.5669, "step": 7597 }, { "epoch": 1.1, "grad_norm": 6.73964786529541, "learning_rate": 1.4787437390181742e-06, "loss": 0.5092, "step": 7598 }, { "epoch": 1.1, "grad_norm": 7.544081211090088, "learning_rate": 1.4786047680884521e-06, "loss": 0.6468, "step": 7599 }, { "epoch": 1.1, "grad_norm": 6.021753787994385, "learning_rate": 1.478465785168065e-06, "loss": 0.5361, "step": 7600 }, { "epoch": 1.1, "grad_norm": 6.159604072570801, "learning_rate": 1.4783267902604958e-06, "loss": 0.5281, "step": 7601 }, { "epoch": 1.1, "grad_norm": 6.538064956665039, "learning_rate": 1.4781877833692254e-06, "loss": 0.5214, "step": 7602 }, { "epoch": 1.1, "grad_norm": 6.215307235717773, "learning_rate": 1.478048764497737e-06, "loss": 0.5423, "step": 7603 }, { "epoch": 1.1, "grad_norm": 6.551434516906738, "learning_rate": 1.4779097336495139e-06, "loss": 0.5225, "step": 7604 }, { "epoch": 1.1, "grad_norm": 5.712653636932373, "learning_rate": 1.4777706908280387e-06, "loss": 0.5234, "step": 7605 }, { "epoch": 1.1, "grad_norm": 6.79163932800293, "learning_rate": 1.4776316360367951e-06, "loss": 0.5437, "step": 7606 }, { "epoch": 1.1, "grad_norm": 6.619131565093994, "learning_rate": 1.4774925692792673e-06, "loss": 0.5707, "step": 7607 }, { "epoch": 1.1, "grad_norm": 5.459439754486084, "learning_rate": 1.4773534905589387e-06, "loss": 0.539, "step": 7608 }, { "epoch": 1.1, "grad_norm": 6.857858657836914, "learning_rate": 1.4772143998792942e-06, "loss": 0.6126, "step": 7609 }, { "epoch": 1.1, "grad_norm": 6.698767185211182, "learning_rate": 1.477075297243818e-06, "loss": 0.5219, "step": 7610 }, { "epoch": 1.1, "grad_norm": 5.727345943450928, "learning_rate": 1.4769361826559958e-06, "loss": 0.5339, "step": 7611 }, { "epoch": 1.1, "grad_norm": 6.1572418212890625, "learning_rate": 1.4767970561193121e-06, "loss": 0.4965, "step": 7612 }, { "epoch": 1.1, "grad_norm": 6.396202087402344, "learning_rate": 1.476657917637253e-06, "loss": 0.5599, "step": 7613 }, { "epoch": 1.1, "grad_norm": 6.075944423675537, "learning_rate": 1.4765187672133045e-06, "loss": 0.5039, "step": 7614 }, { "epoch": 1.1, "grad_norm": 5.764337062835693, "learning_rate": 1.4763796048509525e-06, "loss": 0.5047, "step": 7615 }, { "epoch": 1.11, "grad_norm": 6.594216346740723, "learning_rate": 1.4762404305536833e-06, "loss": 0.5157, "step": 7616 }, { "epoch": 1.11, "grad_norm": 7.412357330322266, "learning_rate": 1.476101244324984e-06, "loss": 0.5788, "step": 7617 }, { "epoch": 1.11, "grad_norm": 6.032526969909668, "learning_rate": 1.4759620461683418e-06, "loss": 0.5712, "step": 7618 }, { "epoch": 1.11, "grad_norm": 6.391642093658447, "learning_rate": 1.4758228360872435e-06, "loss": 0.5589, "step": 7619 }, { "epoch": 1.11, "grad_norm": 6.359813213348389, "learning_rate": 1.4756836140851774e-06, "loss": 0.5381, "step": 7620 }, { "epoch": 1.11, "grad_norm": 6.133072376251221, "learning_rate": 1.4755443801656313e-06, "loss": 0.578, "step": 7621 }, { "epoch": 1.11, "grad_norm": 6.687066078186035, "learning_rate": 1.4754051343320938e-06, "loss": 0.5022, "step": 7622 }, { "epoch": 1.11, "grad_norm": 5.830723285675049, "learning_rate": 1.4752658765880526e-06, "loss": 0.4914, "step": 7623 }, { "epoch": 1.11, "grad_norm": 5.786718368530273, "learning_rate": 1.475126606936997e-06, "loss": 0.4843, "step": 7624 }, { "epoch": 1.11, "grad_norm": 6.052911758422852, "learning_rate": 1.4749873253824165e-06, "loss": 0.5233, "step": 7625 }, { "epoch": 1.11, "grad_norm": 6.514512538909912, "learning_rate": 1.4748480319278002e-06, "loss": 0.5829, "step": 7626 }, { "epoch": 1.11, "grad_norm": 6.588010311126709, "learning_rate": 1.474708726576638e-06, "loss": 0.559, "step": 7627 }, { "epoch": 1.11, "grad_norm": 6.83712911605835, "learning_rate": 1.4745694093324201e-06, "loss": 0.5288, "step": 7628 }, { "epoch": 1.11, "grad_norm": 6.5991034507751465, "learning_rate": 1.474430080198637e-06, "loss": 0.6726, "step": 7629 }, { "epoch": 1.11, "grad_norm": 7.392187118530273, "learning_rate": 1.474290739178779e-06, "loss": 0.6106, "step": 7630 }, { "epoch": 1.11, "grad_norm": 6.660914421081543, "learning_rate": 1.4741513862763367e-06, "loss": 0.555, "step": 7631 }, { "epoch": 1.11, "grad_norm": 6.60892915725708, "learning_rate": 1.474012021494802e-06, "loss": 0.5305, "step": 7632 }, { "epoch": 1.11, "grad_norm": 6.198826313018799, "learning_rate": 1.4738726448376663e-06, "loss": 0.5088, "step": 7633 }, { "epoch": 1.11, "grad_norm": 6.103645324707031, "learning_rate": 1.4737332563084215e-06, "loss": 0.5761, "step": 7634 }, { "epoch": 1.11, "grad_norm": 6.204824447631836, "learning_rate": 1.47359385591056e-06, "loss": 0.5929, "step": 7635 }, { "epoch": 1.11, "grad_norm": 6.294425010681152, "learning_rate": 1.4734544436475738e-06, "loss": 0.5244, "step": 7636 }, { "epoch": 1.11, "grad_norm": 5.999802112579346, "learning_rate": 1.4733150195229555e-06, "loss": 0.5374, "step": 7637 }, { "epoch": 1.11, "grad_norm": 5.777548313140869, "learning_rate": 1.4731755835401986e-06, "loss": 0.4841, "step": 7638 }, { "epoch": 1.11, "grad_norm": 6.552952766418457, "learning_rate": 1.4730361357027966e-06, "loss": 0.6024, "step": 7639 }, { "epoch": 1.11, "grad_norm": 6.36984395980835, "learning_rate": 1.4728966760142424e-06, "loss": 0.5207, "step": 7640 }, { "epoch": 1.11, "grad_norm": 6.5555620193481445, "learning_rate": 1.4727572044780305e-06, "loss": 0.4663, "step": 7641 }, { "epoch": 1.11, "grad_norm": 5.74206018447876, "learning_rate": 1.4726177210976552e-06, "loss": 0.5139, "step": 7642 }, { "epoch": 1.11, "grad_norm": 6.802550792694092, "learning_rate": 1.4724782258766105e-06, "loss": 0.5659, "step": 7643 }, { "epoch": 1.11, "grad_norm": 6.325042247772217, "learning_rate": 1.4723387188183915e-06, "loss": 0.5131, "step": 7644 }, { "epoch": 1.11, "grad_norm": 5.976116180419922, "learning_rate": 1.4721991999264938e-06, "loss": 0.5088, "step": 7645 }, { "epoch": 1.11, "grad_norm": 6.564828395843506, "learning_rate": 1.4720596692044121e-06, "loss": 0.5578, "step": 7646 }, { "epoch": 1.11, "grad_norm": 6.169673442840576, "learning_rate": 1.4719201266556422e-06, "loss": 0.5036, "step": 7647 }, { "epoch": 1.11, "grad_norm": 6.259115219116211, "learning_rate": 1.4717805722836807e-06, "loss": 0.5699, "step": 7648 }, { "epoch": 1.11, "grad_norm": 6.740745544433594, "learning_rate": 1.4716410060920233e-06, "loss": 0.5398, "step": 7649 }, { "epoch": 1.11, "grad_norm": 6.561898231506348, "learning_rate": 1.471501428084167e-06, "loss": 0.5412, "step": 7650 }, { "epoch": 1.11, "grad_norm": 5.9369401931762695, "learning_rate": 1.4713618382636086e-06, "loss": 0.596, "step": 7651 }, { "epoch": 1.11, "grad_norm": 6.939867973327637, "learning_rate": 1.4712222366338449e-06, "loss": 0.6005, "step": 7652 }, { "epoch": 1.11, "grad_norm": 5.447179317474365, "learning_rate": 1.471082623198374e-06, "loss": 0.5048, "step": 7653 }, { "epoch": 1.11, "grad_norm": 6.64789342880249, "learning_rate": 1.470942997960693e-06, "loss": 0.6723, "step": 7654 }, { "epoch": 1.11, "grad_norm": 6.398850917816162, "learning_rate": 1.4708033609243006e-06, "loss": 0.5974, "step": 7655 }, { "epoch": 1.11, "grad_norm": 5.957746982574463, "learning_rate": 1.4706637120926953e-06, "loss": 0.5433, "step": 7656 }, { "epoch": 1.11, "grad_norm": 6.2271904945373535, "learning_rate": 1.4705240514693753e-06, "loss": 0.52, "step": 7657 }, { "epoch": 1.11, "grad_norm": 6.602356433868408, "learning_rate": 1.4703843790578393e-06, "loss": 0.5655, "step": 7658 }, { "epoch": 1.11, "grad_norm": 6.4668288230896, "learning_rate": 1.4702446948615871e-06, "loss": 0.7259, "step": 7659 }, { "epoch": 1.11, "grad_norm": 6.097995758056641, "learning_rate": 1.4701049988841186e-06, "loss": 0.5889, "step": 7660 }, { "epoch": 1.11, "grad_norm": 6.531991958618164, "learning_rate": 1.4699652911289327e-06, "loss": 0.5191, "step": 7661 }, { "epoch": 1.11, "grad_norm": 6.712985038757324, "learning_rate": 1.4698255715995303e-06, "loss": 0.5389, "step": 7662 }, { "epoch": 1.11, "grad_norm": 5.374711990356445, "learning_rate": 1.4696858402994117e-06, "loss": 0.4724, "step": 7663 }, { "epoch": 1.11, "grad_norm": 6.848911762237549, "learning_rate": 1.4695460972320772e-06, "loss": 0.5432, "step": 7664 }, { "epoch": 1.11, "grad_norm": 6.043607234954834, "learning_rate": 1.4694063424010284e-06, "loss": 0.5356, "step": 7665 }, { "epoch": 1.11, "grad_norm": 5.604825496673584, "learning_rate": 1.4692665758097663e-06, "loss": 0.4844, "step": 7666 }, { "epoch": 1.11, "grad_norm": 5.986019134521484, "learning_rate": 1.4691267974617928e-06, "loss": 0.5432, "step": 7667 }, { "epoch": 1.11, "grad_norm": 6.835958480834961, "learning_rate": 1.4689870073606093e-06, "loss": 0.5643, "step": 7668 }, { "epoch": 1.11, "grad_norm": 6.036577224731445, "learning_rate": 1.4688472055097184e-06, "loss": 0.5803, "step": 7669 }, { "epoch": 1.11, "grad_norm": 7.072856426239014, "learning_rate": 1.468707391912623e-06, "loss": 0.6275, "step": 7670 }, { "epoch": 1.11, "grad_norm": 6.246966361999512, "learning_rate": 1.4685675665728252e-06, "loss": 0.5273, "step": 7671 }, { "epoch": 1.11, "grad_norm": 6.443059921264648, "learning_rate": 1.4684277294938281e-06, "loss": 0.5563, "step": 7672 }, { "epoch": 1.11, "grad_norm": 5.3688764572143555, "learning_rate": 1.4682878806791356e-06, "loss": 0.4835, "step": 7673 }, { "epoch": 1.11, "grad_norm": 6.329362392425537, "learning_rate": 1.4681480201322512e-06, "loss": 0.5697, "step": 7674 }, { "epoch": 1.11, "grad_norm": 7.648838520050049, "learning_rate": 1.4680081478566785e-06, "loss": 0.5875, "step": 7675 }, { "epoch": 1.11, "grad_norm": 5.9574713706970215, "learning_rate": 1.4678682638559225e-06, "loss": 0.5274, "step": 7676 }, { "epoch": 1.11, "grad_norm": 6.251524448394775, "learning_rate": 1.467728368133487e-06, "loss": 0.5249, "step": 7677 }, { "epoch": 1.11, "grad_norm": 6.143830299377441, "learning_rate": 1.4675884606928776e-06, "loss": 0.5225, "step": 7678 }, { "epoch": 1.11, "grad_norm": 6.092531204223633, "learning_rate": 1.467448541537599e-06, "loss": 0.5478, "step": 7679 }, { "epoch": 1.11, "grad_norm": 5.9319233894348145, "learning_rate": 1.4673086106711565e-06, "loss": 0.5348, "step": 7680 }, { "epoch": 1.11, "grad_norm": 5.881387233734131, "learning_rate": 1.4671686680970562e-06, "loss": 0.5361, "step": 7681 }, { "epoch": 1.11, "grad_norm": 7.648097515106201, "learning_rate": 1.4670287138188038e-06, "loss": 0.5928, "step": 7682 }, { "epoch": 1.11, "grad_norm": 6.626128196716309, "learning_rate": 1.4668887478399058e-06, "loss": 0.5022, "step": 7683 }, { "epoch": 1.11, "grad_norm": 7.178522109985352, "learning_rate": 1.466748770163869e-06, "loss": 0.5918, "step": 7684 }, { "epoch": 1.12, "grad_norm": 6.158554553985596, "learning_rate": 1.4666087807942003e-06, "loss": 0.5071, "step": 7685 }, { "epoch": 1.12, "grad_norm": 7.0438642501831055, "learning_rate": 1.4664687797344064e-06, "loss": 0.5681, "step": 7686 }, { "epoch": 1.12, "grad_norm": 6.438523769378662, "learning_rate": 1.4663287669879953e-06, "loss": 0.4921, "step": 7687 }, { "epoch": 1.12, "grad_norm": 6.825774669647217, "learning_rate": 1.4661887425584747e-06, "loss": 0.5612, "step": 7688 }, { "epoch": 1.12, "grad_norm": 6.719324588775635, "learning_rate": 1.4660487064493526e-06, "loss": 0.5381, "step": 7689 }, { "epoch": 1.12, "grad_norm": 5.9524970054626465, "learning_rate": 1.4659086586641374e-06, "loss": 0.5506, "step": 7690 }, { "epoch": 1.12, "grad_norm": 6.465636253356934, "learning_rate": 1.465768599206338e-06, "loss": 0.5042, "step": 7691 }, { "epoch": 1.12, "grad_norm": 7.203210353851318, "learning_rate": 1.4656285280794628e-06, "loss": 0.5622, "step": 7692 }, { "epoch": 1.12, "grad_norm": 6.291310787200928, "learning_rate": 1.4654884452870217e-06, "loss": 0.5471, "step": 7693 }, { "epoch": 1.12, "grad_norm": 5.625500679016113, "learning_rate": 1.4653483508325237e-06, "loss": 0.548, "step": 7694 }, { "epoch": 1.12, "grad_norm": 6.062239170074463, "learning_rate": 1.465208244719479e-06, "loss": 0.6043, "step": 7695 }, { "epoch": 1.12, "grad_norm": 7.052718639373779, "learning_rate": 1.4650681269513975e-06, "loss": 0.5131, "step": 7696 }, { "epoch": 1.12, "grad_norm": 6.017605781555176, "learning_rate": 1.46492799753179e-06, "loss": 0.571, "step": 7697 }, { "epoch": 1.12, "grad_norm": 6.474491596221924, "learning_rate": 1.464787856464167e-06, "loss": 0.6036, "step": 7698 }, { "epoch": 1.12, "grad_norm": 6.10147762298584, "learning_rate": 1.4646477037520391e-06, "loss": 0.5979, "step": 7699 }, { "epoch": 1.12, "grad_norm": 6.546785354614258, "learning_rate": 1.4645075393989178e-06, "loss": 0.5601, "step": 7700 }, { "epoch": 1.12, "grad_norm": 6.681352615356445, "learning_rate": 1.4643673634083154e-06, "loss": 0.5734, "step": 7701 }, { "epoch": 1.12, "grad_norm": 6.4711503982543945, "learning_rate": 1.4642271757837428e-06, "loss": 0.5909, "step": 7702 }, { "epoch": 1.12, "grad_norm": 6.848607540130615, "learning_rate": 1.4640869765287127e-06, "loss": 0.5718, "step": 7703 }, { "epoch": 1.12, "grad_norm": 7.1923041343688965, "learning_rate": 1.4639467656467374e-06, "loss": 0.5686, "step": 7704 }, { "epoch": 1.12, "grad_norm": 6.662101745605469, "learning_rate": 1.46380654314133e-06, "loss": 0.5223, "step": 7705 }, { "epoch": 1.12, "grad_norm": 6.857651233673096, "learning_rate": 1.4636663090160033e-06, "loss": 0.6691, "step": 7706 }, { "epoch": 1.12, "grad_norm": 6.746402263641357, "learning_rate": 1.4635260632742704e-06, "loss": 0.6144, "step": 7707 }, { "epoch": 1.12, "grad_norm": 6.342393398284912, "learning_rate": 1.4633858059196452e-06, "loss": 0.5674, "step": 7708 }, { "epoch": 1.12, "grad_norm": 5.695418834686279, "learning_rate": 1.4632455369556412e-06, "loss": 0.49, "step": 7709 }, { "epoch": 1.12, "grad_norm": 6.228631019592285, "learning_rate": 1.4631052563857731e-06, "loss": 0.5037, "step": 7710 }, { "epoch": 1.12, "grad_norm": 6.611648082733154, "learning_rate": 1.4629649642135558e-06, "loss": 0.5646, "step": 7711 }, { "epoch": 1.12, "grad_norm": 6.291491508483887, "learning_rate": 1.4628246604425032e-06, "loss": 0.5715, "step": 7712 }, { "epoch": 1.12, "grad_norm": 6.263856887817383, "learning_rate": 1.4626843450761305e-06, "loss": 0.5409, "step": 7713 }, { "epoch": 1.12, "grad_norm": 5.817803382873535, "learning_rate": 1.4625440181179536e-06, "loss": 0.4929, "step": 7714 }, { "epoch": 1.12, "grad_norm": 5.957472324371338, "learning_rate": 1.4624036795714881e-06, "loss": 0.5488, "step": 7715 }, { "epoch": 1.12, "grad_norm": 6.827146053314209, "learning_rate": 1.4622633294402494e-06, "loss": 0.5456, "step": 7716 }, { "epoch": 1.12, "grad_norm": 5.974592685699463, "learning_rate": 1.462122967727754e-06, "loss": 0.4771, "step": 7717 }, { "epoch": 1.12, "grad_norm": 6.5132927894592285, "learning_rate": 1.4619825944375187e-06, "loss": 0.5248, "step": 7718 }, { "epoch": 1.12, "grad_norm": 5.814718723297119, "learning_rate": 1.4618422095730602e-06, "loss": 0.519, "step": 7719 }, { "epoch": 1.12, "grad_norm": 6.609060287475586, "learning_rate": 1.4617018131378955e-06, "loss": 0.6298, "step": 7720 }, { "epoch": 1.12, "grad_norm": 6.0293073654174805, "learning_rate": 1.461561405135542e-06, "loss": 0.559, "step": 7721 }, { "epoch": 1.12, "grad_norm": 7.397867202758789, "learning_rate": 1.4614209855695175e-06, "loss": 0.6781, "step": 7722 }, { "epoch": 1.12, "grad_norm": 5.879295349121094, "learning_rate": 1.4612805544433397e-06, "loss": 0.5289, "step": 7723 }, { "epoch": 1.12, "grad_norm": 7.157548904418945, "learning_rate": 1.4611401117605274e-06, "loss": 0.5644, "step": 7724 }, { "epoch": 1.12, "grad_norm": 7.4404826164245605, "learning_rate": 1.4609996575245988e-06, "loss": 0.6051, "step": 7725 }, { "epoch": 1.12, "grad_norm": 6.752739429473877, "learning_rate": 1.460859191739073e-06, "loss": 0.5104, "step": 7726 }, { "epoch": 1.12, "grad_norm": 6.04580545425415, "learning_rate": 1.4607187144074685e-06, "loss": 0.5046, "step": 7727 }, { "epoch": 1.12, "grad_norm": 6.728641986846924, "learning_rate": 1.4605782255333056e-06, "loss": 0.5234, "step": 7728 }, { "epoch": 1.12, "grad_norm": 5.9992804527282715, "learning_rate": 1.4604377251201037e-06, "loss": 0.5043, "step": 7729 }, { "epoch": 1.12, "grad_norm": 6.046590328216553, "learning_rate": 1.4602972131713825e-06, "loss": 0.5288, "step": 7730 }, { "epoch": 1.12, "grad_norm": 6.909222602844238, "learning_rate": 1.4601566896906625e-06, "loss": 0.5584, "step": 7731 }, { "epoch": 1.12, "grad_norm": 6.577939987182617, "learning_rate": 1.4600161546814644e-06, "loss": 0.5735, "step": 7732 }, { "epoch": 1.12, "grad_norm": 6.679472923278809, "learning_rate": 1.459875608147309e-06, "loss": 0.5283, "step": 7733 }, { "epoch": 1.12, "grad_norm": 6.073601245880127, "learning_rate": 1.4597350500917176e-06, "loss": 0.4658, "step": 7734 }, { "epoch": 1.12, "grad_norm": 5.789379596710205, "learning_rate": 1.4595944805182112e-06, "loss": 0.5, "step": 7735 }, { "epoch": 1.12, "grad_norm": 5.692552089691162, "learning_rate": 1.459453899430312e-06, "loss": 0.5067, "step": 7736 }, { "epoch": 1.12, "grad_norm": 6.095218658447266, "learning_rate": 1.4593133068315417e-06, "loss": 0.5352, "step": 7737 }, { "epoch": 1.12, "grad_norm": 6.6562323570251465, "learning_rate": 1.459172702725423e-06, "loss": 0.6227, "step": 7738 }, { "epoch": 1.12, "grad_norm": 6.488309860229492, "learning_rate": 1.4590320871154785e-06, "loss": 0.5936, "step": 7739 }, { "epoch": 1.12, "grad_norm": 6.1805620193481445, "learning_rate": 1.4588914600052307e-06, "loss": 0.5256, "step": 7740 }, { "epoch": 1.12, "grad_norm": 6.516138076782227, "learning_rate": 1.458750821398203e-06, "loss": 0.6351, "step": 7741 }, { "epoch": 1.12, "grad_norm": 5.599583625793457, "learning_rate": 1.4586101712979187e-06, "loss": 0.5333, "step": 7742 }, { "epoch": 1.12, "grad_norm": 5.757795810699463, "learning_rate": 1.4584695097079018e-06, "loss": 0.5479, "step": 7743 }, { "epoch": 1.12, "grad_norm": 6.312727928161621, "learning_rate": 1.4583288366316764e-06, "loss": 0.5177, "step": 7744 }, { "epoch": 1.12, "grad_norm": 5.887009143829346, "learning_rate": 1.4581881520727666e-06, "loss": 0.5617, "step": 7745 }, { "epoch": 1.12, "grad_norm": 6.962730884552002, "learning_rate": 1.458047456034697e-06, "loss": 0.6631, "step": 7746 }, { "epoch": 1.12, "grad_norm": 6.047032356262207, "learning_rate": 1.457906748520993e-06, "loss": 0.4949, "step": 7747 }, { "epoch": 1.12, "grad_norm": 7.355362892150879, "learning_rate": 1.4577660295351795e-06, "loss": 0.6112, "step": 7748 }, { "epoch": 1.12, "grad_norm": 6.38485860824585, "learning_rate": 1.4576252990807815e-06, "loss": 0.6126, "step": 7749 }, { "epoch": 1.12, "grad_norm": 6.235472202301025, "learning_rate": 1.4574845571613255e-06, "loss": 0.5692, "step": 7750 }, { "epoch": 1.12, "grad_norm": 6.497649669647217, "learning_rate": 1.457343803780337e-06, "loss": 0.5456, "step": 7751 }, { "epoch": 1.12, "grad_norm": 5.475578308105469, "learning_rate": 1.4572030389413429e-06, "loss": 0.4884, "step": 7752 }, { "epoch": 1.12, "grad_norm": 6.203109264373779, "learning_rate": 1.4570622626478693e-06, "loss": 0.5583, "step": 7753 }, { "epoch": 1.13, "grad_norm": 5.964710235595703, "learning_rate": 1.4569214749034436e-06, "loss": 0.539, "step": 7754 }, { "epoch": 1.13, "grad_norm": 6.214448928833008, "learning_rate": 1.4567806757115924e-06, "loss": 0.5484, "step": 7755 }, { "epoch": 1.13, "grad_norm": 6.198176383972168, "learning_rate": 1.4566398650758438e-06, "loss": 0.5315, "step": 7756 }, { "epoch": 1.13, "grad_norm": 7.1243062019348145, "learning_rate": 1.4564990429997251e-06, "loss": 0.5408, "step": 7757 }, { "epoch": 1.13, "grad_norm": 7.052892208099365, "learning_rate": 1.4563582094867647e-06, "loss": 0.5985, "step": 7758 }, { "epoch": 1.13, "grad_norm": 5.558843612670898, "learning_rate": 1.4562173645404911e-06, "loss": 0.5687, "step": 7759 }, { "epoch": 1.13, "grad_norm": 6.068074703216553, "learning_rate": 1.4560765081644326e-06, "loss": 0.5681, "step": 7760 }, { "epoch": 1.13, "grad_norm": 6.814345836639404, "learning_rate": 1.4559356403621185e-06, "loss": 0.534, "step": 7761 }, { "epoch": 1.13, "grad_norm": 6.014415264129639, "learning_rate": 1.455794761137077e-06, "loss": 0.4748, "step": 7762 }, { "epoch": 1.13, "grad_norm": 6.740880012512207, "learning_rate": 1.455653870492839e-06, "loss": 0.5779, "step": 7763 }, { "epoch": 1.13, "grad_norm": 6.368439674377441, "learning_rate": 1.4555129684329333e-06, "loss": 0.5477, "step": 7764 }, { "epoch": 1.13, "grad_norm": 6.205441951751709, "learning_rate": 1.4553720549608904e-06, "loss": 0.517, "step": 7765 }, { "epoch": 1.13, "grad_norm": 5.728425979614258, "learning_rate": 1.4552311300802404e-06, "loss": 0.5081, "step": 7766 }, { "epoch": 1.13, "grad_norm": 5.843567371368408, "learning_rate": 1.4550901937945145e-06, "loss": 0.5735, "step": 7767 }, { "epoch": 1.13, "grad_norm": 6.386022090911865, "learning_rate": 1.454949246107243e-06, "loss": 0.5572, "step": 7768 }, { "epoch": 1.13, "grad_norm": 6.424267768859863, "learning_rate": 1.4548082870219576e-06, "loss": 0.6299, "step": 7769 }, { "epoch": 1.13, "grad_norm": 7.244176387786865, "learning_rate": 1.4546673165421892e-06, "loss": 0.6043, "step": 7770 }, { "epoch": 1.13, "grad_norm": 6.729500770568848, "learning_rate": 1.45452633467147e-06, "loss": 0.5622, "step": 7771 }, { "epoch": 1.13, "grad_norm": 7.003281593322754, "learning_rate": 1.4543853414133323e-06, "loss": 0.6384, "step": 7772 }, { "epoch": 1.13, "grad_norm": 6.826751232147217, "learning_rate": 1.4542443367713077e-06, "loss": 0.6052, "step": 7773 }, { "epoch": 1.13, "grad_norm": 6.007393836975098, "learning_rate": 1.4541033207489298e-06, "loss": 0.4791, "step": 7774 }, { "epoch": 1.13, "grad_norm": 6.380312442779541, "learning_rate": 1.4539622933497309e-06, "loss": 0.5056, "step": 7775 }, { "epoch": 1.13, "grad_norm": 5.435148239135742, "learning_rate": 1.4538212545772445e-06, "loss": 0.5529, "step": 7776 }, { "epoch": 1.13, "grad_norm": 6.643627166748047, "learning_rate": 1.4536802044350036e-06, "loss": 0.5862, "step": 7777 }, { "epoch": 1.13, "grad_norm": 5.905539035797119, "learning_rate": 1.4535391429265425e-06, "loss": 0.5049, "step": 7778 }, { "epoch": 1.13, "grad_norm": 5.7708353996276855, "learning_rate": 1.453398070055395e-06, "loss": 0.4747, "step": 7779 }, { "epoch": 1.13, "grad_norm": 6.754214286804199, "learning_rate": 1.4532569858250953e-06, "loss": 0.5222, "step": 7780 }, { "epoch": 1.13, "grad_norm": 6.519301891326904, "learning_rate": 1.4531158902391788e-06, "loss": 0.5381, "step": 7781 }, { "epoch": 1.13, "grad_norm": 5.959908962249756, "learning_rate": 1.45297478330118e-06, "loss": 0.5213, "step": 7782 }, { "epoch": 1.13, "grad_norm": 5.840351581573486, "learning_rate": 1.4528336650146335e-06, "loss": 0.5165, "step": 7783 }, { "epoch": 1.13, "grad_norm": 6.1187334060668945, "learning_rate": 1.4526925353830755e-06, "loss": 0.4902, "step": 7784 }, { "epoch": 1.13, "grad_norm": 6.853916168212891, "learning_rate": 1.4525513944100416e-06, "loss": 0.559, "step": 7785 }, { "epoch": 1.13, "grad_norm": 5.6044769287109375, "learning_rate": 1.4524102420990679e-06, "loss": 0.4996, "step": 7786 }, { "epoch": 1.13, "grad_norm": 6.462498188018799, "learning_rate": 1.4522690784536904e-06, "loss": 0.5158, "step": 7787 }, { "epoch": 1.13, "grad_norm": 6.141416072845459, "learning_rate": 1.4521279034774464e-06, "loss": 0.5628, "step": 7788 }, { "epoch": 1.13, "grad_norm": 5.67054557800293, "learning_rate": 1.451986717173872e-06, "loss": 0.4865, "step": 7789 }, { "epoch": 1.13, "grad_norm": 6.638569355010986, "learning_rate": 1.4518455195465049e-06, "loss": 0.5814, "step": 7790 }, { "epoch": 1.13, "grad_norm": 6.943526744842529, "learning_rate": 1.4517043105988825e-06, "loss": 0.632, "step": 7791 }, { "epoch": 1.13, "grad_norm": 6.818483352661133, "learning_rate": 1.4515630903345428e-06, "loss": 0.6031, "step": 7792 }, { "epoch": 1.13, "grad_norm": 6.218172073364258, "learning_rate": 1.4514218587570233e-06, "loss": 0.5144, "step": 7793 }, { "epoch": 1.13, "grad_norm": 5.861682891845703, "learning_rate": 1.4512806158698627e-06, "loss": 0.5232, "step": 7794 }, { "epoch": 1.13, "grad_norm": 6.9351606369018555, "learning_rate": 1.4511393616765997e-06, "loss": 0.4741, "step": 7795 }, { "epoch": 1.13, "grad_norm": 6.258444786071777, "learning_rate": 1.4509980961807728e-06, "loss": 0.4662, "step": 7796 }, { "epoch": 1.13, "grad_norm": 6.49799108505249, "learning_rate": 1.4508568193859217e-06, "loss": 0.5387, "step": 7797 }, { "epoch": 1.13, "grad_norm": 5.459583282470703, "learning_rate": 1.450715531295585e-06, "loss": 0.5622, "step": 7798 }, { "epoch": 1.13, "grad_norm": 6.284205436706543, "learning_rate": 1.4505742319133035e-06, "loss": 0.5349, "step": 7799 }, { "epoch": 1.13, "grad_norm": 6.472721576690674, "learning_rate": 1.4504329212426165e-06, "loss": 0.4932, "step": 7800 }, { "epoch": 1.13, "grad_norm": 6.141124725341797, "learning_rate": 1.4502915992870644e-06, "loss": 0.5323, "step": 7801 }, { "epoch": 1.13, "grad_norm": 6.045468807220459, "learning_rate": 1.4501502660501883e-06, "loss": 0.4916, "step": 7802 }, { "epoch": 1.13, "grad_norm": 5.771701812744141, "learning_rate": 1.4500089215355285e-06, "loss": 0.4949, "step": 7803 }, { "epoch": 1.13, "grad_norm": 6.058821201324463, "learning_rate": 1.4498675657466264e-06, "loss": 0.4987, "step": 7804 }, { "epoch": 1.13, "grad_norm": 6.326629161834717, "learning_rate": 1.4497261986870234e-06, "loss": 0.4943, "step": 7805 }, { "epoch": 1.13, "grad_norm": 6.170173168182373, "learning_rate": 1.449584820360261e-06, "loss": 0.5306, "step": 7806 }, { "epoch": 1.13, "grad_norm": 6.658072471618652, "learning_rate": 1.4494434307698814e-06, "loss": 0.5753, "step": 7807 }, { "epoch": 1.13, "grad_norm": 7.053719520568848, "learning_rate": 1.4493020299194273e-06, "loss": 0.5764, "step": 7808 }, { "epoch": 1.13, "grad_norm": 5.6702775955200195, "learning_rate": 1.4491606178124408e-06, "loss": 0.4877, "step": 7809 }, { "epoch": 1.13, "grad_norm": 6.4007954597473145, "learning_rate": 1.4490191944524649e-06, "loss": 0.5617, "step": 7810 }, { "epoch": 1.13, "grad_norm": 5.976220607757568, "learning_rate": 1.4488777598430425e-06, "loss": 0.5361, "step": 7811 }, { "epoch": 1.13, "grad_norm": 7.054795265197754, "learning_rate": 1.448736313987717e-06, "loss": 0.6036, "step": 7812 }, { "epoch": 1.13, "grad_norm": 5.749596118927002, "learning_rate": 1.4485948568900323e-06, "loss": 0.5237, "step": 7813 }, { "epoch": 1.13, "grad_norm": 6.729485034942627, "learning_rate": 1.4484533885535323e-06, "loss": 0.6475, "step": 7814 }, { "epoch": 1.13, "grad_norm": 7.743664264678955, "learning_rate": 1.4483119089817615e-06, "loss": 0.6732, "step": 7815 }, { "epoch": 1.13, "grad_norm": 5.988935947418213, "learning_rate": 1.4481704181782644e-06, "loss": 0.5379, "step": 7816 }, { "epoch": 1.13, "grad_norm": 6.302181243896484, "learning_rate": 1.4480289161465854e-06, "loss": 0.5202, "step": 7817 }, { "epoch": 1.13, "grad_norm": 6.115719795227051, "learning_rate": 1.4478874028902701e-06, "loss": 0.5333, "step": 7818 }, { "epoch": 1.13, "grad_norm": 6.260828495025635, "learning_rate": 1.4477458784128635e-06, "loss": 0.5861, "step": 7819 }, { "epoch": 1.13, "grad_norm": 5.80215311050415, "learning_rate": 1.4476043427179114e-06, "loss": 0.5505, "step": 7820 }, { "epoch": 1.13, "grad_norm": 5.65122652053833, "learning_rate": 1.4474627958089598e-06, "loss": 0.5432, "step": 7821 }, { "epoch": 1.13, "grad_norm": 6.097949028015137, "learning_rate": 1.447321237689555e-06, "loss": 0.4989, "step": 7822 }, { "epoch": 1.14, "grad_norm": 6.35352897644043, "learning_rate": 1.4471796683632435e-06, "loss": 0.5401, "step": 7823 }, { "epoch": 1.14, "grad_norm": 7.319899082183838, "learning_rate": 1.447038087833572e-06, "loss": 0.5707, "step": 7824 }, { "epoch": 1.14, "grad_norm": 6.709521770477295, "learning_rate": 1.4468964961040873e-06, "loss": 0.6522, "step": 7825 }, { "epoch": 1.14, "grad_norm": 7.871204376220703, "learning_rate": 1.446754893178337e-06, "loss": 0.527, "step": 7826 }, { "epoch": 1.14, "grad_norm": 6.6310648918151855, "learning_rate": 1.446613279059869e-06, "loss": 0.5375, "step": 7827 }, { "epoch": 1.14, "grad_norm": 6.566380500793457, "learning_rate": 1.4464716537522307e-06, "loss": 0.5067, "step": 7828 }, { "epoch": 1.14, "grad_norm": 6.718559741973877, "learning_rate": 1.446330017258971e-06, "loss": 0.5469, "step": 7829 }, { "epoch": 1.14, "grad_norm": 6.514605522155762, "learning_rate": 1.4461883695836378e-06, "loss": 0.5315, "step": 7830 }, { "epoch": 1.14, "grad_norm": 6.00870943069458, "learning_rate": 1.4460467107297797e-06, "loss": 0.5502, "step": 7831 }, { "epoch": 1.14, "grad_norm": 6.513839244842529, "learning_rate": 1.4459050407009463e-06, "loss": 0.569, "step": 7832 }, { "epoch": 1.14, "grad_norm": 6.1966023445129395, "learning_rate": 1.4457633595006864e-06, "loss": 0.543, "step": 7833 }, { "epoch": 1.14, "grad_norm": 6.440255641937256, "learning_rate": 1.4456216671325498e-06, "loss": 0.5711, "step": 7834 }, { "epoch": 1.14, "grad_norm": 5.94866418838501, "learning_rate": 1.4454799636000864e-06, "loss": 0.5104, "step": 7835 }, { "epoch": 1.14, "grad_norm": 5.806567192077637, "learning_rate": 1.4453382489068464e-06, "loss": 0.5482, "step": 7836 }, { "epoch": 1.14, "grad_norm": 6.109949588775635, "learning_rate": 1.4451965230563803e-06, "loss": 0.5379, "step": 7837 }, { "epoch": 1.14, "grad_norm": 6.1845879554748535, "learning_rate": 1.4450547860522387e-06, "loss": 0.6127, "step": 7838 }, { "epoch": 1.14, "grad_norm": 6.392467021942139, "learning_rate": 1.4449130378979723e-06, "loss": 0.5161, "step": 7839 }, { "epoch": 1.14, "grad_norm": 6.654178619384766, "learning_rate": 1.444771278597133e-06, "loss": 0.5193, "step": 7840 }, { "epoch": 1.14, "grad_norm": 6.407243251800537, "learning_rate": 1.4446295081532712e-06, "loss": 0.5953, "step": 7841 }, { "epoch": 1.14, "grad_norm": 6.397729396820068, "learning_rate": 1.4444877265699401e-06, "loss": 0.4422, "step": 7842 }, { "epoch": 1.14, "grad_norm": 6.400407791137695, "learning_rate": 1.4443459338506914e-06, "loss": 0.546, "step": 7843 }, { "epoch": 1.14, "grad_norm": 7.369271755218506, "learning_rate": 1.4442041299990772e-06, "loss": 0.5845, "step": 7844 }, { "epoch": 1.14, "grad_norm": 7.130178928375244, "learning_rate": 1.4440623150186505e-06, "loss": 0.5514, "step": 7845 }, { "epoch": 1.14, "grad_norm": 6.058858871459961, "learning_rate": 1.4439204889129637e-06, "loss": 0.5457, "step": 7846 }, { "epoch": 1.14, "grad_norm": 6.748690605163574, "learning_rate": 1.4437786516855703e-06, "loss": 0.5172, "step": 7847 }, { "epoch": 1.14, "grad_norm": 6.3277058601379395, "learning_rate": 1.4436368033400238e-06, "loss": 0.5602, "step": 7848 }, { "epoch": 1.14, "grad_norm": 5.985226154327393, "learning_rate": 1.4434949438798783e-06, "loss": 0.5354, "step": 7849 }, { "epoch": 1.14, "grad_norm": 6.936526775360107, "learning_rate": 1.4433530733086873e-06, "loss": 0.5161, "step": 7850 }, { "epoch": 1.14, "grad_norm": 6.774054050445557, "learning_rate": 1.4432111916300058e-06, "loss": 0.5756, "step": 7851 }, { "epoch": 1.14, "grad_norm": 7.111387252807617, "learning_rate": 1.4430692988473879e-06, "loss": 0.5462, "step": 7852 }, { "epoch": 1.14, "grad_norm": 5.271725177764893, "learning_rate": 1.4429273949643883e-06, "loss": 0.5505, "step": 7853 }, { "epoch": 1.14, "grad_norm": 6.823106288909912, "learning_rate": 1.4427854799845626e-06, "loss": 0.5788, "step": 7854 }, { "epoch": 1.14, "grad_norm": 6.561315059661865, "learning_rate": 1.4426435539114664e-06, "loss": 0.5791, "step": 7855 }, { "epoch": 1.14, "grad_norm": 6.403024196624756, "learning_rate": 1.442501616748655e-06, "loss": 0.5621, "step": 7856 }, { "epoch": 1.14, "grad_norm": 6.431491851806641, "learning_rate": 1.4423596684996846e-06, "loss": 0.5585, "step": 7857 }, { "epoch": 1.14, "grad_norm": 6.25385046005249, "learning_rate": 1.4422177091681118e-06, "loss": 0.5884, "step": 7858 }, { "epoch": 1.14, "grad_norm": 5.584990978240967, "learning_rate": 1.4420757387574925e-06, "loss": 0.5631, "step": 7859 }, { "epoch": 1.14, "grad_norm": 6.132287502288818, "learning_rate": 1.441933757271384e-06, "loss": 0.5276, "step": 7860 }, { "epoch": 1.14, "grad_norm": 6.471119403839111, "learning_rate": 1.441791764713343e-06, "loss": 0.5534, "step": 7861 }, { "epoch": 1.14, "grad_norm": 6.529298782348633, "learning_rate": 1.4416497610869275e-06, "loss": 0.5751, "step": 7862 }, { "epoch": 1.14, "grad_norm": 6.332345485687256, "learning_rate": 1.4415077463956946e-06, "loss": 0.5641, "step": 7863 }, { "epoch": 1.14, "grad_norm": 6.783633232116699, "learning_rate": 1.4413657206432023e-06, "loss": 0.5693, "step": 7864 }, { "epoch": 1.14, "grad_norm": 6.066470623016357, "learning_rate": 1.4412236838330094e-06, "loss": 0.5285, "step": 7865 }, { "epoch": 1.14, "grad_norm": 5.343722820281982, "learning_rate": 1.4410816359686741e-06, "loss": 0.4961, "step": 7866 }, { "epoch": 1.14, "grad_norm": 6.033927917480469, "learning_rate": 1.4409395770537548e-06, "loss": 0.5264, "step": 7867 }, { "epoch": 1.14, "grad_norm": 7.134608268737793, "learning_rate": 1.4407975070918107e-06, "loss": 0.6107, "step": 7868 }, { "epoch": 1.14, "grad_norm": 5.889811992645264, "learning_rate": 1.4406554260864017e-06, "loss": 0.543, "step": 7869 }, { "epoch": 1.14, "grad_norm": 7.355950832366943, "learning_rate": 1.4405133340410867e-06, "loss": 0.5178, "step": 7870 }, { "epoch": 1.14, "grad_norm": 6.312981605529785, "learning_rate": 1.440371230959426e-06, "loss": 0.5825, "step": 7871 }, { "epoch": 1.14, "grad_norm": 6.697200775146484, "learning_rate": 1.4402291168449799e-06, "loss": 0.5282, "step": 7872 }, { "epoch": 1.14, "grad_norm": 6.219459533691406, "learning_rate": 1.440086991701308e-06, "loss": 0.5633, "step": 7873 }, { "epoch": 1.14, "grad_norm": 5.493391990661621, "learning_rate": 1.439944855531972e-06, "loss": 0.4593, "step": 7874 }, { "epoch": 1.14, "grad_norm": 6.44678258895874, "learning_rate": 1.4398027083405325e-06, "loss": 0.5188, "step": 7875 }, { "epoch": 1.14, "grad_norm": 6.880377769470215, "learning_rate": 1.4396605501305502e-06, "loss": 0.5394, "step": 7876 }, { "epoch": 1.14, "grad_norm": 6.361232280731201, "learning_rate": 1.4395183809055878e-06, "loss": 0.5605, "step": 7877 }, { "epoch": 1.14, "grad_norm": 5.816461563110352, "learning_rate": 1.4393762006692062e-06, "loss": 0.5538, "step": 7878 }, { "epoch": 1.14, "grad_norm": 5.536167621612549, "learning_rate": 1.4392340094249682e-06, "loss": 0.5028, "step": 7879 }, { "epoch": 1.14, "grad_norm": 5.980853080749512, "learning_rate": 1.4390918071764354e-06, "loss": 0.5395, "step": 7880 }, { "epoch": 1.14, "grad_norm": 6.9768147468566895, "learning_rate": 1.438949593927171e-06, "loss": 0.5958, "step": 7881 }, { "epoch": 1.14, "grad_norm": 6.5704569816589355, "learning_rate": 1.4388073696807378e-06, "loss": 0.5519, "step": 7882 }, { "epoch": 1.14, "grad_norm": 6.035661697387695, "learning_rate": 1.4386651344406986e-06, "loss": 0.4941, "step": 7883 }, { "epoch": 1.14, "grad_norm": 6.1615824699401855, "learning_rate": 1.4385228882106174e-06, "loss": 0.5974, "step": 7884 }, { "epoch": 1.14, "grad_norm": 5.878805160522461, "learning_rate": 1.4383806309940579e-06, "loss": 0.5329, "step": 7885 }, { "epoch": 1.14, "grad_norm": 6.5528669357299805, "learning_rate": 1.4382383627945837e-06, "loss": 0.5781, "step": 7886 }, { "epoch": 1.14, "grad_norm": 6.702714920043945, "learning_rate": 1.4380960836157596e-06, "loss": 0.5714, "step": 7887 }, { "epoch": 1.14, "grad_norm": 5.62559700012207, "learning_rate": 1.43795379346115e-06, "loss": 0.4598, "step": 7888 }, { "epoch": 1.14, "grad_norm": 6.161458969116211, "learning_rate": 1.4378114923343195e-06, "loss": 0.5212, "step": 7889 }, { "epoch": 1.14, "grad_norm": 5.858314037322998, "learning_rate": 1.4376691802388335e-06, "loss": 0.5118, "step": 7890 }, { "epoch": 1.14, "grad_norm": 6.097862720489502, "learning_rate": 1.4375268571782574e-06, "loss": 0.5241, "step": 7891 }, { "epoch": 1.15, "grad_norm": 5.934686660766602, "learning_rate": 1.4373845231561567e-06, "loss": 0.557, "step": 7892 }, { "epoch": 1.15, "grad_norm": 6.993984222412109, "learning_rate": 1.4372421781760974e-06, "loss": 0.5782, "step": 7893 }, { "epoch": 1.15, "grad_norm": 6.448337078094482, "learning_rate": 1.437099822241646e-06, "loss": 0.4921, "step": 7894 }, { "epoch": 1.15, "grad_norm": 6.543298244476318, "learning_rate": 1.4369574553563687e-06, "loss": 0.5521, "step": 7895 }, { "epoch": 1.15, "grad_norm": 6.5774617195129395, "learning_rate": 1.436815077523832e-06, "loss": 0.6688, "step": 7896 }, { "epoch": 1.15, "grad_norm": 6.943669319152832, "learning_rate": 1.4366726887476035e-06, "loss": 0.5647, "step": 7897 }, { "epoch": 1.15, "grad_norm": 6.039862155914307, "learning_rate": 1.4365302890312502e-06, "loss": 0.5668, "step": 7898 }, { "epoch": 1.15, "grad_norm": 7.230013370513916, "learning_rate": 1.4363878783783404e-06, "loss": 0.5256, "step": 7899 }, { "epoch": 1.15, "grad_norm": 5.6133317947387695, "learning_rate": 1.4362454567924407e-06, "loss": 0.5241, "step": 7900 }, { "epoch": 1.15, "grad_norm": 5.980051517486572, "learning_rate": 1.4361030242771202e-06, "loss": 0.5408, "step": 7901 }, { "epoch": 1.15, "grad_norm": 5.854536533355713, "learning_rate": 1.435960580835947e-06, "loss": 0.5581, "step": 7902 }, { "epoch": 1.15, "grad_norm": 6.664938449859619, "learning_rate": 1.4358181264724896e-06, "loss": 0.5702, "step": 7903 }, { "epoch": 1.15, "grad_norm": 7.175311088562012, "learning_rate": 1.4356756611903173e-06, "loss": 0.565, "step": 7904 }, { "epoch": 1.15, "grad_norm": 6.380574703216553, "learning_rate": 1.4355331849929993e-06, "loss": 0.5642, "step": 7905 }, { "epoch": 1.15, "grad_norm": 6.933131694793701, "learning_rate": 1.435390697884105e-06, "loss": 0.5998, "step": 7906 }, { "epoch": 1.15, "grad_norm": 6.127513885498047, "learning_rate": 1.435248199867204e-06, "loss": 0.5232, "step": 7907 }, { "epoch": 1.15, "grad_norm": 6.527799129486084, "learning_rate": 1.4351056909458668e-06, "loss": 0.5066, "step": 7908 }, { "epoch": 1.15, "grad_norm": 7.1474690437316895, "learning_rate": 1.4349631711236634e-06, "loss": 0.592, "step": 7909 }, { "epoch": 1.15, "grad_norm": 6.541923522949219, "learning_rate": 1.4348206404041644e-06, "loss": 0.5834, "step": 7910 }, { "epoch": 1.15, "grad_norm": 6.008877277374268, "learning_rate": 1.434678098790941e-06, "loss": 0.4863, "step": 7911 }, { "epoch": 1.15, "grad_norm": 6.064561367034912, "learning_rate": 1.4345355462875638e-06, "loss": 0.5185, "step": 7912 }, { "epoch": 1.15, "grad_norm": 6.720149040222168, "learning_rate": 1.4343929828976048e-06, "loss": 0.5248, "step": 7913 }, { "epoch": 1.15, "grad_norm": 7.167924880981445, "learning_rate": 1.4342504086246354e-06, "loss": 0.6023, "step": 7914 }, { "epoch": 1.15, "grad_norm": 5.876843452453613, "learning_rate": 1.4341078234722275e-06, "loss": 0.5065, "step": 7915 }, { "epoch": 1.15, "grad_norm": 6.662808418273926, "learning_rate": 1.4339652274439534e-06, "loss": 0.5621, "step": 7916 }, { "epoch": 1.15, "grad_norm": 6.214193820953369, "learning_rate": 1.4338226205433857e-06, "loss": 0.531, "step": 7917 }, { "epoch": 1.15, "grad_norm": 6.756522178649902, "learning_rate": 1.4336800027740969e-06, "loss": 0.5204, "step": 7918 }, { "epoch": 1.15, "grad_norm": 6.0330328941345215, "learning_rate": 1.4335373741396604e-06, "loss": 0.4795, "step": 7919 }, { "epoch": 1.15, "grad_norm": 6.876089572906494, "learning_rate": 1.4333947346436496e-06, "loss": 0.4869, "step": 7920 }, { "epoch": 1.15, "grad_norm": 6.246021747589111, "learning_rate": 1.4332520842896378e-06, "loss": 0.5391, "step": 7921 }, { "epoch": 1.15, "grad_norm": 6.86798095703125, "learning_rate": 1.433109423081199e-06, "loss": 0.5693, "step": 7922 }, { "epoch": 1.15, "grad_norm": 6.288329124450684, "learning_rate": 1.4329667510219075e-06, "loss": 0.4989, "step": 7923 }, { "epoch": 1.15, "grad_norm": 6.410622596740723, "learning_rate": 1.4328240681153374e-06, "loss": 0.5698, "step": 7924 }, { "epoch": 1.15, "grad_norm": 6.070080757141113, "learning_rate": 1.4326813743650634e-06, "loss": 0.4582, "step": 7925 }, { "epoch": 1.15, "grad_norm": 6.986623287200928, "learning_rate": 1.4325386697746606e-06, "loss": 0.5751, "step": 7926 }, { "epoch": 1.15, "grad_norm": 5.806697845458984, "learning_rate": 1.4323959543477047e-06, "loss": 0.4724, "step": 7927 }, { "epoch": 1.15, "grad_norm": 6.6910176277160645, "learning_rate": 1.4322532280877704e-06, "loss": 0.5859, "step": 7928 }, { "epoch": 1.15, "grad_norm": 7.538322925567627, "learning_rate": 1.432110490998434e-06, "loss": 0.5337, "step": 7929 }, { "epoch": 1.15, "grad_norm": 7.4185261726379395, "learning_rate": 1.4319677430832707e-06, "loss": 0.5513, "step": 7930 }, { "epoch": 1.15, "grad_norm": 5.888252258300781, "learning_rate": 1.4318249843458582e-06, "loss": 0.5326, "step": 7931 }, { "epoch": 1.15, "grad_norm": 5.499440670013428, "learning_rate": 1.431682214789772e-06, "loss": 0.5023, "step": 7932 }, { "epoch": 1.15, "grad_norm": 6.201111793518066, "learning_rate": 1.4315394344185894e-06, "loss": 0.5799, "step": 7933 }, { "epoch": 1.15, "grad_norm": 5.996991157531738, "learning_rate": 1.4313966432358874e-06, "loss": 0.502, "step": 7934 }, { "epoch": 1.15, "grad_norm": 6.346124649047852, "learning_rate": 1.4312538412452437e-06, "loss": 0.51, "step": 7935 }, { "epoch": 1.15, "grad_norm": 6.563859462738037, "learning_rate": 1.4311110284502354e-06, "loss": 0.5973, "step": 7936 }, { "epoch": 1.15, "grad_norm": 6.507368564605713, "learning_rate": 1.4309682048544409e-06, "loss": 0.5474, "step": 7937 }, { "epoch": 1.15, "grad_norm": 6.876960277557373, "learning_rate": 1.4308253704614384e-06, "loss": 0.6022, "step": 7938 }, { "epoch": 1.15, "grad_norm": 6.816725730895996, "learning_rate": 1.430682525274806e-06, "loss": 0.5856, "step": 7939 }, { "epoch": 1.15, "grad_norm": 6.82442045211792, "learning_rate": 1.430539669298123e-06, "loss": 0.5951, "step": 7940 }, { "epoch": 1.15, "grad_norm": 7.01360559463501, "learning_rate": 1.4303968025349677e-06, "loss": 0.571, "step": 7941 }, { "epoch": 1.15, "grad_norm": 6.054470062255859, "learning_rate": 1.4302539249889205e-06, "loss": 0.6079, "step": 7942 }, { "epoch": 1.15, "grad_norm": 5.355623722076416, "learning_rate": 1.4301110366635598e-06, "loss": 0.4958, "step": 7943 }, { "epoch": 1.15, "grad_norm": 6.174736499786377, "learning_rate": 1.429968137562466e-06, "loss": 0.5307, "step": 7944 }, { "epoch": 1.15, "grad_norm": 6.336453914642334, "learning_rate": 1.429825227689219e-06, "loss": 0.6319, "step": 7945 }, { "epoch": 1.15, "grad_norm": 6.222607135772705, "learning_rate": 1.4296823070473995e-06, "loss": 0.5654, "step": 7946 }, { "epoch": 1.15, "grad_norm": 6.116804122924805, "learning_rate": 1.429539375640588e-06, "loss": 0.5208, "step": 7947 }, { "epoch": 1.15, "grad_norm": 6.201533317565918, "learning_rate": 1.4293964334723656e-06, "loss": 0.5683, "step": 7948 }, { "epoch": 1.15, "grad_norm": 6.680995941162109, "learning_rate": 1.429253480546313e-06, "loss": 0.5397, "step": 7949 }, { "epoch": 1.15, "grad_norm": 6.696208477020264, "learning_rate": 1.4291105168660118e-06, "loss": 0.5458, "step": 7950 }, { "epoch": 1.15, "grad_norm": 6.421475887298584, "learning_rate": 1.4289675424350437e-06, "loss": 0.5675, "step": 7951 }, { "epoch": 1.15, "grad_norm": 6.449297904968262, "learning_rate": 1.4288245572569913e-06, "loss": 0.5824, "step": 7952 }, { "epoch": 1.15, "grad_norm": 6.495330810546875, "learning_rate": 1.428681561335436e-06, "loss": 0.4914, "step": 7953 }, { "epoch": 1.15, "grad_norm": 6.915515422821045, "learning_rate": 1.4285385546739609e-06, "loss": 0.5082, "step": 7954 }, { "epoch": 1.15, "grad_norm": 6.085285186767578, "learning_rate": 1.4283955372761488e-06, "loss": 0.5082, "step": 7955 }, { "epoch": 1.15, "grad_norm": 5.927398204803467, "learning_rate": 1.4282525091455822e-06, "loss": 0.543, "step": 7956 }, { "epoch": 1.15, "grad_norm": 7.372576713562012, "learning_rate": 1.428109470285845e-06, "loss": 0.5607, "step": 7957 }, { "epoch": 1.15, "grad_norm": 6.0495405197143555, "learning_rate": 1.4279664207005207e-06, "loss": 0.4778, "step": 7958 }, { "epoch": 1.15, "grad_norm": 6.4066243171691895, "learning_rate": 1.4278233603931928e-06, "loss": 0.4764, "step": 7959 }, { "epoch": 1.15, "grad_norm": 7.089413166046143, "learning_rate": 1.427680289367446e-06, "loss": 0.5016, "step": 7960 }, { "epoch": 1.16, "grad_norm": 8.262371063232422, "learning_rate": 1.4275372076268644e-06, "loss": 0.7063, "step": 7961 }, { "epoch": 1.16, "grad_norm": 6.211212635040283, "learning_rate": 1.4273941151750328e-06, "loss": 0.5234, "step": 7962 }, { "epoch": 1.16, "grad_norm": 6.410481929779053, "learning_rate": 1.4272510120155361e-06, "loss": 0.548, "step": 7963 }, { "epoch": 1.16, "grad_norm": 6.782063007354736, "learning_rate": 1.4271078981519593e-06, "loss": 0.5696, "step": 7964 }, { "epoch": 1.16, "grad_norm": 5.509038925170898, "learning_rate": 1.4269647735878882e-06, "loss": 0.5252, "step": 7965 }, { "epoch": 1.16, "grad_norm": 5.658022880554199, "learning_rate": 1.4268216383269085e-06, "loss": 0.5567, "step": 7966 }, { "epoch": 1.16, "grad_norm": 5.642689228057861, "learning_rate": 1.4266784923726061e-06, "loss": 0.5106, "step": 7967 }, { "epoch": 1.16, "grad_norm": 6.636242389678955, "learning_rate": 1.4265353357285675e-06, "loss": 0.5855, "step": 7968 }, { "epoch": 1.16, "grad_norm": 6.359776020050049, "learning_rate": 1.4263921683983788e-06, "loss": 0.4441, "step": 7969 }, { "epoch": 1.16, "grad_norm": 7.327999591827393, "learning_rate": 1.4262489903856275e-06, "loss": 0.5159, "step": 7970 }, { "epoch": 1.16, "grad_norm": 6.328696250915527, "learning_rate": 1.4261058016938997e-06, "loss": 0.5424, "step": 7971 }, { "epoch": 1.16, "grad_norm": 6.11935567855835, "learning_rate": 1.425962602326784e-06, "loss": 0.5682, "step": 7972 }, { "epoch": 1.16, "grad_norm": 7.431000709533691, "learning_rate": 1.4258193922878671e-06, "loss": 0.5523, "step": 7973 }, { "epoch": 1.16, "grad_norm": 5.977790355682373, "learning_rate": 1.4256761715807375e-06, "loss": 0.5033, "step": 7974 }, { "epoch": 1.16, "grad_norm": 6.670629024505615, "learning_rate": 1.4255329402089831e-06, "loss": 0.5555, "step": 7975 }, { "epoch": 1.16, "grad_norm": 6.589574337005615, "learning_rate": 1.4253896981761922e-06, "loss": 0.5156, "step": 7976 }, { "epoch": 1.16, "grad_norm": 6.759585857391357, "learning_rate": 1.4252464454859533e-06, "loss": 0.6114, "step": 7977 }, { "epoch": 1.16, "grad_norm": 6.834707736968994, "learning_rate": 1.425103182141856e-06, "loss": 0.5021, "step": 7978 }, { "epoch": 1.16, "grad_norm": 7.064179420471191, "learning_rate": 1.424959908147489e-06, "loss": 0.5525, "step": 7979 }, { "epoch": 1.16, "grad_norm": 6.798471927642822, "learning_rate": 1.4248166235064422e-06, "loss": 0.5084, "step": 7980 }, { "epoch": 1.16, "grad_norm": 6.981337070465088, "learning_rate": 1.424673328222305e-06, "loss": 0.4474, "step": 7981 }, { "epoch": 1.16, "grad_norm": 6.241314888000488, "learning_rate": 1.4245300222986676e-06, "loss": 0.5438, "step": 7982 }, { "epoch": 1.16, "grad_norm": 6.168456077575684, "learning_rate": 1.4243867057391205e-06, "loss": 0.5377, "step": 7983 }, { "epoch": 1.16, "grad_norm": 6.8630051612854, "learning_rate": 1.4242433785472537e-06, "loss": 0.5481, "step": 7984 }, { "epoch": 1.16, "grad_norm": 7.110276699066162, "learning_rate": 1.4241000407266585e-06, "loss": 0.4911, "step": 7985 }, { "epoch": 1.16, "grad_norm": 6.736468315124512, "learning_rate": 1.4239566922809257e-06, "loss": 0.5083, "step": 7986 }, { "epoch": 1.16, "grad_norm": 6.165514945983887, "learning_rate": 1.4238133332136473e-06, "loss": 0.5068, "step": 7987 }, { "epoch": 1.16, "grad_norm": 6.893011569976807, "learning_rate": 1.4236699635284141e-06, "loss": 0.5387, "step": 7988 }, { "epoch": 1.16, "grad_norm": 6.69030237197876, "learning_rate": 1.4235265832288183e-06, "loss": 0.5602, "step": 7989 }, { "epoch": 1.16, "grad_norm": 5.861440658569336, "learning_rate": 1.4233831923184524e-06, "loss": 0.5495, "step": 7990 }, { "epoch": 1.16, "grad_norm": 6.438919544219971, "learning_rate": 1.4232397908009083e-06, "loss": 0.5421, "step": 7991 }, { "epoch": 1.16, "grad_norm": 5.572507858276367, "learning_rate": 1.423096378679779e-06, "loss": 0.5549, "step": 7992 }, { "epoch": 1.16, "grad_norm": 6.207709789276123, "learning_rate": 1.4229529559586576e-06, "loss": 0.5007, "step": 7993 }, { "epoch": 1.16, "grad_norm": 6.229541301727295, "learning_rate": 1.422809522641137e-06, "loss": 0.4831, "step": 7994 }, { "epoch": 1.16, "grad_norm": 7.187258720397949, "learning_rate": 1.4226660787308104e-06, "loss": 0.6162, "step": 7995 }, { "epoch": 1.16, "grad_norm": 6.923040390014648, "learning_rate": 1.4225226242312724e-06, "loss": 0.5292, "step": 7996 }, { "epoch": 1.16, "grad_norm": 6.4632768630981445, "learning_rate": 1.4223791591461166e-06, "loss": 0.5243, "step": 7997 }, { "epoch": 1.16, "grad_norm": 6.412832260131836, "learning_rate": 1.4222356834789369e-06, "loss": 0.5973, "step": 7998 }, { "epoch": 1.16, "grad_norm": 6.3780436515808105, "learning_rate": 1.4220921972333284e-06, "loss": 0.5291, "step": 7999 }, { "epoch": 1.16, "grad_norm": 7.258358478546143, "learning_rate": 1.421948700412886e-06, "loss": 0.5307, "step": 8000 }, { "epoch": 1.16, "grad_norm": 6.13813591003418, "learning_rate": 1.421805193021204e-06, "loss": 0.5287, "step": 8001 }, { "epoch": 1.16, "grad_norm": 6.724220275878906, "learning_rate": 1.4216616750618786e-06, "loss": 0.6124, "step": 8002 }, { "epoch": 1.16, "grad_norm": 6.566656112670898, "learning_rate": 1.4215181465385048e-06, "loss": 0.5518, "step": 8003 }, { "epoch": 1.16, "grad_norm": 6.776859760284424, "learning_rate": 1.421374607454679e-06, "loss": 0.6302, "step": 8004 }, { "epoch": 1.16, "grad_norm": 5.9694366455078125, "learning_rate": 1.4212310578139971e-06, "loss": 0.5683, "step": 8005 }, { "epoch": 1.16, "grad_norm": 6.00910758972168, "learning_rate": 1.4210874976200553e-06, "loss": 0.5185, "step": 8006 }, { "epoch": 1.16, "grad_norm": 6.0301618576049805, "learning_rate": 1.4209439268764505e-06, "loss": 0.5673, "step": 8007 }, { "epoch": 1.16, "grad_norm": 5.764768600463867, "learning_rate": 1.4208003455867796e-06, "loss": 0.4749, "step": 8008 }, { "epoch": 1.16, "grad_norm": 6.186665058135986, "learning_rate": 1.4206567537546396e-06, "loss": 0.5566, "step": 8009 }, { "epoch": 1.16, "grad_norm": 5.358029842376709, "learning_rate": 1.4205131513836284e-06, "loss": 0.5072, "step": 8010 }, { "epoch": 1.16, "grad_norm": 5.651952743530273, "learning_rate": 1.4203695384773435e-06, "loss": 0.5328, "step": 8011 }, { "epoch": 1.16, "grad_norm": 7.2940545082092285, "learning_rate": 1.4202259150393826e-06, "loss": 0.5602, "step": 8012 }, { "epoch": 1.16, "grad_norm": 6.335158824920654, "learning_rate": 1.4200822810733443e-06, "loss": 0.5287, "step": 8013 }, { "epoch": 1.16, "grad_norm": 6.290490627288818, "learning_rate": 1.419938636582827e-06, "loss": 0.5757, "step": 8014 }, { "epoch": 1.16, "grad_norm": 6.461133003234863, "learning_rate": 1.4197949815714294e-06, "loss": 0.6046, "step": 8015 }, { "epoch": 1.16, "grad_norm": 6.181288719177246, "learning_rate": 1.4196513160427505e-06, "loss": 0.5288, "step": 8016 }, { "epoch": 1.16, "grad_norm": 6.574042320251465, "learning_rate": 1.41950764000039e-06, "loss": 0.522, "step": 8017 }, { "epoch": 1.16, "grad_norm": 7.014410495758057, "learning_rate": 1.4193639534479474e-06, "loss": 0.5328, "step": 8018 }, { "epoch": 1.16, "grad_norm": 6.112696647644043, "learning_rate": 1.419220256389022e-06, "loss": 0.5323, "step": 8019 }, { "epoch": 1.16, "grad_norm": 6.699777126312256, "learning_rate": 1.4190765488272142e-06, "loss": 0.6326, "step": 8020 }, { "epoch": 1.16, "grad_norm": 6.521242618560791, "learning_rate": 1.4189328307661245e-06, "loss": 0.5712, "step": 8021 }, { "epoch": 1.16, "grad_norm": 5.492729187011719, "learning_rate": 1.4187891022093537e-06, "loss": 0.4234, "step": 8022 }, { "epoch": 1.16, "grad_norm": 5.930608749389648, "learning_rate": 1.4186453631605017e-06, "loss": 0.6027, "step": 8023 }, { "epoch": 1.16, "grad_norm": 5.762702941894531, "learning_rate": 1.418501613623171e-06, "loss": 0.487, "step": 8024 }, { "epoch": 1.16, "grad_norm": 6.390544414520264, "learning_rate": 1.4183578536009622e-06, "loss": 0.5403, "step": 8025 }, { "epoch": 1.16, "grad_norm": 6.869581699371338, "learning_rate": 1.4182140830974772e-06, "loss": 0.6075, "step": 8026 }, { "epoch": 1.16, "grad_norm": 6.1733527183532715, "learning_rate": 1.4180703021163176e-06, "loss": 0.4543, "step": 8027 }, { "epoch": 1.16, "grad_norm": 6.620699882507324, "learning_rate": 1.417926510661086e-06, "loss": 0.5485, "step": 8028 }, { "epoch": 1.16, "grad_norm": 6.893430709838867, "learning_rate": 1.4177827087353849e-06, "loss": 0.5656, "step": 8029 }, { "epoch": 1.17, "grad_norm": 6.197608947753906, "learning_rate": 1.4176388963428165e-06, "loss": 0.6518, "step": 8030 }, { "epoch": 1.17, "grad_norm": 6.178915023803711, "learning_rate": 1.4174950734869847e-06, "loss": 0.5385, "step": 8031 }, { "epoch": 1.17, "grad_norm": 7.4159135818481445, "learning_rate": 1.4173512401714917e-06, "loss": 0.6233, "step": 8032 }, { "epoch": 1.17, "grad_norm": 6.097822666168213, "learning_rate": 1.4172073963999417e-06, "loss": 0.5637, "step": 8033 }, { "epoch": 1.17, "grad_norm": 6.832413673400879, "learning_rate": 1.4170635421759383e-06, "loss": 0.5203, "step": 8034 }, { "epoch": 1.17, "grad_norm": 6.205141067504883, "learning_rate": 1.4169196775030853e-06, "loss": 0.5474, "step": 8035 }, { "epoch": 1.17, "grad_norm": 6.11722469329834, "learning_rate": 1.4167758023849873e-06, "loss": 0.5604, "step": 8036 }, { "epoch": 1.17, "grad_norm": 6.829136848449707, "learning_rate": 1.416631916825249e-06, "loss": 0.5897, "step": 8037 }, { "epoch": 1.17, "grad_norm": 5.856016159057617, "learning_rate": 1.4164880208274748e-06, "loss": 0.5626, "step": 8038 }, { "epoch": 1.17, "grad_norm": 6.351937294006348, "learning_rate": 1.41634411439527e-06, "loss": 0.5247, "step": 8039 }, { "epoch": 1.17, "grad_norm": 7.261159896850586, "learning_rate": 1.41620019753224e-06, "loss": 0.5818, "step": 8040 }, { "epoch": 1.17, "grad_norm": 7.0812668800354, "learning_rate": 1.41605627024199e-06, "loss": 0.6258, "step": 8041 }, { "epoch": 1.17, "grad_norm": 6.002397537231445, "learning_rate": 1.4159123325281264e-06, "loss": 0.5464, "step": 8042 }, { "epoch": 1.17, "grad_norm": 5.9366841316223145, "learning_rate": 1.415768384394255e-06, "loss": 0.4938, "step": 8043 }, { "epoch": 1.17, "grad_norm": 5.815208435058594, "learning_rate": 1.4156244258439822e-06, "loss": 0.486, "step": 8044 }, { "epoch": 1.17, "grad_norm": 5.974307537078857, "learning_rate": 1.415480456880915e-06, "loss": 0.5554, "step": 8045 }, { "epoch": 1.17, "grad_norm": 6.698685646057129, "learning_rate": 1.4153364775086601e-06, "loss": 0.5134, "step": 8046 }, { "epoch": 1.17, "grad_norm": 6.573446750640869, "learning_rate": 1.4151924877308242e-06, "loss": 0.5118, "step": 8047 }, { "epoch": 1.17, "grad_norm": 5.862283229827881, "learning_rate": 1.4150484875510153e-06, "loss": 0.5143, "step": 8048 }, { "epoch": 1.17, "grad_norm": 6.7733049392700195, "learning_rate": 1.4149044769728408e-06, "loss": 0.52, "step": 8049 }, { "epoch": 1.17, "grad_norm": 6.2656378746032715, "learning_rate": 1.4147604559999088e-06, "loss": 0.5496, "step": 8050 }, { "epoch": 1.17, "grad_norm": 7.149051666259766, "learning_rate": 1.4146164246358277e-06, "loss": 0.5441, "step": 8051 }, { "epoch": 1.17, "grad_norm": 5.6669921875, "learning_rate": 1.4144723828842058e-06, "loss": 0.4686, "step": 8052 }, { "epoch": 1.17, "grad_norm": 6.302104473114014, "learning_rate": 1.4143283307486518e-06, "loss": 0.5883, "step": 8053 }, { "epoch": 1.17, "grad_norm": 7.061257362365723, "learning_rate": 1.4141842682327742e-06, "loss": 0.6271, "step": 8054 }, { "epoch": 1.17, "grad_norm": 6.181976318359375, "learning_rate": 1.4140401953401832e-06, "loss": 0.5085, "step": 8055 }, { "epoch": 1.17, "grad_norm": 6.5193562507629395, "learning_rate": 1.4138961120744877e-06, "loss": 0.594, "step": 8056 }, { "epoch": 1.17, "grad_norm": 6.010401725769043, "learning_rate": 1.4137520184392977e-06, "loss": 0.5945, "step": 8057 }, { "epoch": 1.17, "grad_norm": 6.382508754730225, "learning_rate": 1.413607914438223e-06, "loss": 0.5743, "step": 8058 }, { "epoch": 1.17, "grad_norm": 6.786508083343506, "learning_rate": 1.4134638000748744e-06, "loss": 0.5649, "step": 8059 }, { "epoch": 1.17, "grad_norm": 7.560819149017334, "learning_rate": 1.413319675352862e-06, "loss": 0.5757, "step": 8060 }, { "epoch": 1.17, "grad_norm": 6.466067314147949, "learning_rate": 1.4131755402757965e-06, "loss": 0.5477, "step": 8061 }, { "epoch": 1.17, "grad_norm": 5.763129234313965, "learning_rate": 1.4130313948472892e-06, "loss": 0.5379, "step": 8062 }, { "epoch": 1.17, "grad_norm": 6.213130474090576, "learning_rate": 1.4128872390709513e-06, "loss": 0.5041, "step": 8063 }, { "epoch": 1.17, "grad_norm": 6.0905866622924805, "learning_rate": 1.412743072950395e-06, "loss": 0.5256, "step": 8064 }, { "epoch": 1.17, "grad_norm": 5.908262729644775, "learning_rate": 1.412598896489231e-06, "loss": 0.5141, "step": 8065 }, { "epoch": 1.17, "grad_norm": 5.656437873840332, "learning_rate": 1.4124547096910723e-06, "loss": 0.5639, "step": 8066 }, { "epoch": 1.17, "grad_norm": 6.819458961486816, "learning_rate": 1.4123105125595314e-06, "loss": 0.5984, "step": 8067 }, { "epoch": 1.17, "grad_norm": 5.548643589019775, "learning_rate": 1.4121663050982203e-06, "loss": 0.5559, "step": 8068 }, { "epoch": 1.17, "grad_norm": 6.0317864418029785, "learning_rate": 1.4120220873107518e-06, "loss": 0.5624, "step": 8069 }, { "epoch": 1.17, "grad_norm": 5.811639308929443, "learning_rate": 1.4118778592007396e-06, "loss": 0.5276, "step": 8070 }, { "epoch": 1.17, "grad_norm": 6.8440375328063965, "learning_rate": 1.4117336207717969e-06, "loss": 0.542, "step": 8071 }, { "epoch": 1.17, "grad_norm": 6.086575984954834, "learning_rate": 1.4115893720275374e-06, "loss": 0.5547, "step": 8072 }, { "epoch": 1.17, "grad_norm": 6.722485065460205, "learning_rate": 1.4114451129715749e-06, "loss": 0.5393, "step": 8073 }, { "epoch": 1.17, "grad_norm": 6.15450382232666, "learning_rate": 1.4113008436075235e-06, "loss": 0.5755, "step": 8074 }, { "epoch": 1.17, "grad_norm": 6.343529224395752, "learning_rate": 1.4111565639389977e-06, "loss": 0.5068, "step": 8075 }, { "epoch": 1.17, "grad_norm": 6.799323081970215, "learning_rate": 1.4110122739696122e-06, "loss": 0.5949, "step": 8076 }, { "epoch": 1.17, "grad_norm": 6.729673862457275, "learning_rate": 1.4108679737029822e-06, "loss": 0.5066, "step": 8077 }, { "epoch": 1.17, "grad_norm": 7.008426189422607, "learning_rate": 1.4107236631427228e-06, "loss": 0.5929, "step": 8078 }, { "epoch": 1.17, "grad_norm": 6.765337944030762, "learning_rate": 1.4105793422924493e-06, "loss": 0.554, "step": 8079 }, { "epoch": 1.17, "grad_norm": 6.202638626098633, "learning_rate": 1.4104350111557775e-06, "loss": 0.5061, "step": 8080 }, { "epoch": 1.17, "grad_norm": 6.575694561004639, "learning_rate": 1.4102906697363232e-06, "loss": 0.4768, "step": 8081 }, { "epoch": 1.17, "grad_norm": 6.297030925750732, "learning_rate": 1.410146318037703e-06, "loss": 0.5593, "step": 8082 }, { "epoch": 1.17, "grad_norm": 5.733243465423584, "learning_rate": 1.4100019560635331e-06, "loss": 0.4753, "step": 8083 }, { "epoch": 1.17, "grad_norm": 6.427083492279053, "learning_rate": 1.4098575838174304e-06, "loss": 0.5567, "step": 8084 }, { "epoch": 1.17, "grad_norm": 6.421254634857178, "learning_rate": 1.4097132013030122e-06, "loss": 0.5254, "step": 8085 }, { "epoch": 1.17, "grad_norm": 6.9527130126953125, "learning_rate": 1.4095688085238951e-06, "loss": 0.606, "step": 8086 }, { "epoch": 1.17, "grad_norm": 6.67804479598999, "learning_rate": 1.4094244054836972e-06, "loss": 0.5358, "step": 8087 }, { "epoch": 1.17, "grad_norm": 7.775659561157227, "learning_rate": 1.409279992186036e-06, "loss": 0.6365, "step": 8088 }, { "epoch": 1.17, "grad_norm": 6.676171779632568, "learning_rate": 1.4091355686345296e-06, "loss": 0.5248, "step": 8089 }, { "epoch": 1.17, "grad_norm": 5.804840564727783, "learning_rate": 1.4089911348327962e-06, "loss": 0.5214, "step": 8090 }, { "epoch": 1.17, "grad_norm": 6.811270713806152, "learning_rate": 1.4088466907844545e-06, "loss": 0.5386, "step": 8091 }, { "epoch": 1.17, "grad_norm": 6.3695549964904785, "learning_rate": 1.4087022364931231e-06, "loss": 0.5533, "step": 8092 }, { "epoch": 1.17, "grad_norm": 6.94586181640625, "learning_rate": 1.4085577719624215e-06, "loss": 0.5635, "step": 8093 }, { "epoch": 1.17, "grad_norm": 7.451143264770508, "learning_rate": 1.4084132971959685e-06, "loss": 0.6101, "step": 8094 }, { "epoch": 1.17, "grad_norm": 6.52240514755249, "learning_rate": 1.4082688121973844e-06, "loss": 0.5594, "step": 8095 }, { "epoch": 1.17, "grad_norm": 6.577210903167725, "learning_rate": 1.4081243169702883e-06, "loss": 0.5119, "step": 8096 }, { "epoch": 1.17, "grad_norm": 5.475601673126221, "learning_rate": 1.4079798115183006e-06, "loss": 0.4848, "step": 8097 }, { "epoch": 1.17, "grad_norm": 6.164546012878418, "learning_rate": 1.4078352958450417e-06, "loss": 0.5345, "step": 8098 }, { "epoch": 1.18, "grad_norm": 7.784965991973877, "learning_rate": 1.4076907699541318e-06, "loss": 0.5912, "step": 8099 }, { "epoch": 1.18, "grad_norm": 6.213583946228027, "learning_rate": 1.4075462338491924e-06, "loss": 0.5349, "step": 8100 }, { "epoch": 1.18, "grad_norm": 7.527630805969238, "learning_rate": 1.4074016875338442e-06, "loss": 0.568, "step": 8101 }, { "epoch": 1.18, "grad_norm": 5.8166351318359375, "learning_rate": 1.4072571310117084e-06, "loss": 0.5322, "step": 8102 }, { "epoch": 1.18, "grad_norm": 6.9249396324157715, "learning_rate": 1.4071125642864076e-06, "loss": 0.5628, "step": 8103 }, { "epoch": 1.18, "grad_norm": 6.394449710845947, "learning_rate": 1.4069679873615622e-06, "loss": 0.5729, "step": 8104 }, { "epoch": 1.18, "grad_norm": 6.135803699493408, "learning_rate": 1.4068234002407958e-06, "loss": 0.5235, "step": 8105 }, { "epoch": 1.18, "grad_norm": 6.317669868469238, "learning_rate": 1.4066788029277298e-06, "loss": 0.5238, "step": 8106 }, { "epoch": 1.18, "grad_norm": 6.254926681518555, "learning_rate": 1.4065341954259871e-06, "loss": 0.4863, "step": 8107 }, { "epoch": 1.18, "grad_norm": 6.532647132873535, "learning_rate": 1.406389577739191e-06, "loss": 0.5353, "step": 8108 }, { "epoch": 1.18, "grad_norm": 6.262001037597656, "learning_rate": 1.406244949870964e-06, "loss": 0.5666, "step": 8109 }, { "epoch": 1.18, "grad_norm": 6.216604709625244, "learning_rate": 1.40610031182493e-06, "loss": 0.46, "step": 8110 }, { "epoch": 1.18, "grad_norm": 7.040116310119629, "learning_rate": 1.4059556636047125e-06, "loss": 0.5352, "step": 8111 }, { "epoch": 1.18, "grad_norm": 6.035040855407715, "learning_rate": 1.4058110052139353e-06, "loss": 0.5467, "step": 8112 }, { "epoch": 1.18, "grad_norm": 6.537341117858887, "learning_rate": 1.405666336656223e-06, "loss": 0.5527, "step": 8113 }, { "epoch": 1.18, "grad_norm": 5.579728603363037, "learning_rate": 1.4055216579351998e-06, "loss": 0.4704, "step": 8114 }, { "epoch": 1.18, "grad_norm": 6.457627773284912, "learning_rate": 1.4053769690544904e-06, "loss": 0.5885, "step": 8115 }, { "epoch": 1.18, "grad_norm": 6.395876407623291, "learning_rate": 1.4052322700177196e-06, "loss": 0.5408, "step": 8116 }, { "epoch": 1.18, "grad_norm": 6.412521839141846, "learning_rate": 1.4050875608285124e-06, "loss": 0.5653, "step": 8117 }, { "epoch": 1.18, "grad_norm": 6.515058994293213, "learning_rate": 1.404942841490495e-06, "loss": 0.5875, "step": 8118 }, { "epoch": 1.18, "grad_norm": 6.537639141082764, "learning_rate": 1.4047981120072925e-06, "loss": 0.5849, "step": 8119 }, { "epoch": 1.18, "grad_norm": 6.289422035217285, "learning_rate": 1.4046533723825308e-06, "loss": 0.5445, "step": 8120 }, { "epoch": 1.18, "grad_norm": 5.785762310028076, "learning_rate": 1.404508622619837e-06, "loss": 0.6021, "step": 8121 }, { "epoch": 1.18, "grad_norm": 6.105376243591309, "learning_rate": 1.4043638627228361e-06, "loss": 0.5017, "step": 8122 }, { "epoch": 1.18, "grad_norm": 6.787084579467773, "learning_rate": 1.4042190926951562e-06, "loss": 0.5439, "step": 8123 }, { "epoch": 1.18, "grad_norm": 6.781733512878418, "learning_rate": 1.4040743125404238e-06, "loss": 0.5058, "step": 8124 }, { "epoch": 1.18, "grad_norm": 6.04825496673584, "learning_rate": 1.4039295222622655e-06, "loss": 0.4601, "step": 8125 }, { "epoch": 1.18, "grad_norm": 5.566313743591309, "learning_rate": 1.4037847218643098e-06, "loss": 0.5234, "step": 8126 }, { "epoch": 1.18, "grad_norm": 6.719681262969971, "learning_rate": 1.4036399113501838e-06, "loss": 0.5838, "step": 8127 }, { "epoch": 1.18, "grad_norm": 6.9860520362854, "learning_rate": 1.4034950907235156e-06, "loss": 0.5461, "step": 8128 }, { "epoch": 1.18, "grad_norm": 7.196279048919678, "learning_rate": 1.4033502599879336e-06, "loss": 0.5851, "step": 8129 }, { "epoch": 1.18, "grad_norm": 5.978280067443848, "learning_rate": 1.4032054191470661e-06, "loss": 0.5055, "step": 8130 }, { "epoch": 1.18, "grad_norm": 6.137669563293457, "learning_rate": 1.4030605682045422e-06, "loss": 0.5149, "step": 8131 }, { "epoch": 1.18, "grad_norm": 6.720118045806885, "learning_rate": 1.4029157071639905e-06, "loss": 0.6469, "step": 8132 }, { "epoch": 1.18, "grad_norm": 5.905937194824219, "learning_rate": 1.4027708360290405e-06, "loss": 0.5439, "step": 8133 }, { "epoch": 1.18, "grad_norm": 7.491723537445068, "learning_rate": 1.4026259548033214e-06, "loss": 0.5799, "step": 8134 }, { "epoch": 1.18, "grad_norm": 6.084851264953613, "learning_rate": 1.4024810634904636e-06, "loss": 0.5197, "step": 8135 }, { "epoch": 1.18, "grad_norm": 6.242819309234619, "learning_rate": 1.4023361620940967e-06, "loss": 0.5681, "step": 8136 }, { "epoch": 1.18, "grad_norm": 5.656228065490723, "learning_rate": 1.4021912506178507e-06, "loss": 0.517, "step": 8137 }, { "epoch": 1.18, "grad_norm": 7.113826274871826, "learning_rate": 1.4020463290653565e-06, "loss": 0.5854, "step": 8138 }, { "epoch": 1.18, "grad_norm": 6.2602009773254395, "learning_rate": 1.4019013974402447e-06, "loss": 0.5066, "step": 8139 }, { "epoch": 1.18, "grad_norm": 7.024199962615967, "learning_rate": 1.4017564557461466e-06, "loss": 0.6794, "step": 8140 }, { "epoch": 1.18, "grad_norm": 6.547749996185303, "learning_rate": 1.4016115039866932e-06, "loss": 0.5741, "step": 8141 }, { "epoch": 1.18, "grad_norm": 5.802624225616455, "learning_rate": 1.401466542165516e-06, "loss": 0.5836, "step": 8142 }, { "epoch": 1.18, "grad_norm": 6.40382719039917, "learning_rate": 1.4013215702862473e-06, "loss": 0.5566, "step": 8143 }, { "epoch": 1.18, "grad_norm": 5.827211856842041, "learning_rate": 1.4011765883525184e-06, "loss": 0.5355, "step": 8144 }, { "epoch": 1.18, "grad_norm": 6.714227199554443, "learning_rate": 1.401031596367962e-06, "loss": 0.5151, "step": 8145 }, { "epoch": 1.18, "grad_norm": 6.205801010131836, "learning_rate": 1.4008865943362105e-06, "loss": 0.5374, "step": 8146 }, { "epoch": 1.18, "grad_norm": 6.154549598693848, "learning_rate": 1.4007415822608968e-06, "loss": 0.5266, "step": 8147 }, { "epoch": 1.18, "grad_norm": 6.964009761810303, "learning_rate": 1.4005965601456537e-06, "loss": 0.5784, "step": 8148 }, { "epoch": 1.18, "grad_norm": 6.744359016418457, "learning_rate": 1.400451527994115e-06, "loss": 0.6087, "step": 8149 }, { "epoch": 1.18, "grad_norm": 6.512462615966797, "learning_rate": 1.4003064858099141e-06, "loss": 0.5609, "step": 8150 }, { "epoch": 1.18, "grad_norm": 5.758487701416016, "learning_rate": 1.400161433596684e-06, "loss": 0.4955, "step": 8151 }, { "epoch": 1.18, "grad_norm": 6.3351521492004395, "learning_rate": 1.4000163713580598e-06, "loss": 0.5476, "step": 8152 }, { "epoch": 1.18, "grad_norm": 6.867452144622803, "learning_rate": 1.3998712990976753e-06, "loss": 0.5421, "step": 8153 }, { "epoch": 1.18, "grad_norm": 6.997506618499756, "learning_rate": 1.3997262168191649e-06, "loss": 0.6222, "step": 8154 }, { "epoch": 1.18, "grad_norm": 6.19938850402832, "learning_rate": 1.3995811245261638e-06, "loss": 0.539, "step": 8155 }, { "epoch": 1.18, "grad_norm": 6.360269546508789, "learning_rate": 1.399436022222307e-06, "loss": 0.5695, "step": 8156 }, { "epoch": 1.18, "grad_norm": 6.086359977722168, "learning_rate": 1.3992909099112296e-06, "loss": 0.5275, "step": 8157 }, { "epoch": 1.18, "grad_norm": 6.13277006149292, "learning_rate": 1.399145787596567e-06, "loss": 0.4841, "step": 8158 }, { "epoch": 1.18, "grad_norm": 6.15534782409668, "learning_rate": 1.3990006552819555e-06, "loss": 0.5409, "step": 8159 }, { "epoch": 1.18, "grad_norm": 6.711808204650879, "learning_rate": 1.3988555129710308e-06, "loss": 0.616, "step": 8160 }, { "epoch": 1.18, "grad_norm": 6.130260944366455, "learning_rate": 1.3987103606674295e-06, "loss": 0.5282, "step": 8161 }, { "epoch": 1.18, "grad_norm": 6.5461015701293945, "learning_rate": 1.3985651983747877e-06, "loss": 0.4952, "step": 8162 }, { "epoch": 1.18, "grad_norm": 7.0577392578125, "learning_rate": 1.3984200260967424e-06, "loss": 0.4416, "step": 8163 }, { "epoch": 1.18, "grad_norm": 6.4411821365356445, "learning_rate": 1.3982748438369312e-06, "loss": 0.6396, "step": 8164 }, { "epoch": 1.18, "grad_norm": 6.964411735534668, "learning_rate": 1.3981296515989905e-06, "loss": 0.5687, "step": 8165 }, { "epoch": 1.18, "grad_norm": 5.940643310546875, "learning_rate": 1.3979844493865583e-06, "loss": 0.5365, "step": 8166 }, { "epoch": 1.18, "grad_norm": 6.585288047790527, "learning_rate": 1.3978392372032724e-06, "loss": 0.6212, "step": 8167 }, { "epoch": 1.19, "grad_norm": 6.591354846954346, "learning_rate": 1.3976940150527708e-06, "loss": 0.5805, "step": 8168 }, { "epoch": 1.19, "grad_norm": 5.700319290161133, "learning_rate": 1.3975487829386918e-06, "loss": 0.5059, "step": 8169 }, { "epoch": 1.19, "grad_norm": 6.948652744293213, "learning_rate": 1.397403540864674e-06, "loss": 0.5689, "step": 8170 }, { "epoch": 1.19, "grad_norm": 5.653316497802734, "learning_rate": 1.3972582888343565e-06, "loss": 0.545, "step": 8171 }, { "epoch": 1.19, "grad_norm": 6.379695415496826, "learning_rate": 1.3971130268513778e-06, "loss": 0.5401, "step": 8172 }, { "epoch": 1.19, "grad_norm": 6.273723125457764, "learning_rate": 1.3969677549193774e-06, "loss": 0.5408, "step": 8173 }, { "epoch": 1.19, "grad_norm": 6.143279552459717, "learning_rate": 1.396822473041995e-06, "loss": 0.5721, "step": 8174 }, { "epoch": 1.19, "grad_norm": 6.351095676422119, "learning_rate": 1.3966771812228703e-06, "loss": 0.5231, "step": 8175 }, { "epoch": 1.19, "grad_norm": 6.377164363861084, "learning_rate": 1.3965318794656432e-06, "loss": 0.5668, "step": 8176 }, { "epoch": 1.19, "grad_norm": 6.373739242553711, "learning_rate": 1.3963865677739543e-06, "loss": 0.5698, "step": 8177 }, { "epoch": 1.19, "grad_norm": 6.392817497253418, "learning_rate": 1.3962412461514444e-06, "loss": 0.5001, "step": 8178 }, { "epoch": 1.19, "grad_norm": 5.699767112731934, "learning_rate": 1.3960959146017532e-06, "loss": 0.5924, "step": 8179 }, { "epoch": 1.19, "grad_norm": 6.655535697937012, "learning_rate": 1.3959505731285229e-06, "loss": 0.5691, "step": 8180 }, { "epoch": 1.19, "grad_norm": 6.117150783538818, "learning_rate": 1.395805221735394e-06, "loss": 0.5504, "step": 8181 }, { "epoch": 1.19, "grad_norm": 5.488027095794678, "learning_rate": 1.3956598604260086e-06, "loss": 0.4825, "step": 8182 }, { "epoch": 1.19, "grad_norm": 6.180591583251953, "learning_rate": 1.3955144892040083e-06, "loss": 0.4681, "step": 8183 }, { "epoch": 1.19, "grad_norm": 6.576677322387695, "learning_rate": 1.395369108073035e-06, "loss": 0.5621, "step": 8184 }, { "epoch": 1.19, "grad_norm": 6.930161476135254, "learning_rate": 1.3952237170367313e-06, "loss": 0.6044, "step": 8185 }, { "epoch": 1.19, "grad_norm": 7.549661159515381, "learning_rate": 1.3950783160987394e-06, "loss": 0.4787, "step": 8186 }, { "epoch": 1.19, "grad_norm": 7.328894138336182, "learning_rate": 1.3949329052627022e-06, "loss": 0.6489, "step": 8187 }, { "epoch": 1.19, "grad_norm": 5.633615970611572, "learning_rate": 1.3947874845322627e-06, "loss": 0.4877, "step": 8188 }, { "epoch": 1.19, "grad_norm": 5.82802677154541, "learning_rate": 1.3946420539110643e-06, "loss": 0.6001, "step": 8189 }, { "epoch": 1.19, "grad_norm": 5.962538242340088, "learning_rate": 1.3944966134027505e-06, "loss": 0.5522, "step": 8190 }, { "epoch": 1.19, "grad_norm": 6.725261211395264, "learning_rate": 1.3943511630109652e-06, "loss": 0.5982, "step": 8191 }, { "epoch": 1.19, "grad_norm": 6.618692398071289, "learning_rate": 1.394205702739352e-06, "loss": 0.48, "step": 8192 }, { "epoch": 1.19, "grad_norm": 7.296270847320557, "learning_rate": 1.3940602325915558e-06, "loss": 0.4892, "step": 8193 }, { "epoch": 1.19, "grad_norm": 6.272547721862793, "learning_rate": 1.3939147525712204e-06, "loss": 0.4561, "step": 8194 }, { "epoch": 1.19, "grad_norm": 6.303041458129883, "learning_rate": 1.3937692626819911e-06, "loss": 0.5503, "step": 8195 }, { "epoch": 1.19, "grad_norm": 8.103352546691895, "learning_rate": 1.3936237629275124e-06, "loss": 0.5631, "step": 8196 }, { "epoch": 1.19, "grad_norm": 6.844870090484619, "learning_rate": 1.3934782533114302e-06, "loss": 0.5354, "step": 8197 }, { "epoch": 1.19, "grad_norm": 6.293679714202881, "learning_rate": 1.3933327338373896e-06, "loss": 0.5205, "step": 8198 }, { "epoch": 1.19, "grad_norm": 5.906081199645996, "learning_rate": 1.3931872045090368e-06, "loss": 0.6078, "step": 8199 }, { "epoch": 1.19, "grad_norm": 6.684195518493652, "learning_rate": 1.393041665330017e-06, "loss": 0.5477, "step": 8200 }, { "epoch": 1.19, "grad_norm": 6.942782878875732, "learning_rate": 1.392896116303977e-06, "loss": 0.5927, "step": 8201 }, { "epoch": 1.19, "grad_norm": 6.343011379241943, "learning_rate": 1.3927505574345632e-06, "loss": 0.5297, "step": 8202 }, { "epoch": 1.19, "grad_norm": 6.547997951507568, "learning_rate": 1.3926049887254222e-06, "loss": 0.559, "step": 8203 }, { "epoch": 1.19, "grad_norm": 5.833698749542236, "learning_rate": 1.3924594101802013e-06, "loss": 0.5112, "step": 8204 }, { "epoch": 1.19, "grad_norm": 6.51095724105835, "learning_rate": 1.3923138218025477e-06, "loss": 0.5262, "step": 8205 }, { "epoch": 1.19, "grad_norm": 6.061232089996338, "learning_rate": 1.3921682235961086e-06, "loss": 0.5, "step": 8206 }, { "epoch": 1.19, "grad_norm": 6.4909796714782715, "learning_rate": 1.3920226155645317e-06, "loss": 0.5273, "step": 8207 }, { "epoch": 1.19, "grad_norm": 6.206329822540283, "learning_rate": 1.3918769977114652e-06, "loss": 0.553, "step": 8208 }, { "epoch": 1.19, "grad_norm": 6.415735721588135, "learning_rate": 1.3917313700405574e-06, "loss": 0.5279, "step": 8209 }, { "epoch": 1.19, "grad_norm": 6.578516006469727, "learning_rate": 1.3915857325554567e-06, "loss": 0.5504, "step": 8210 }, { "epoch": 1.19, "grad_norm": 7.027426242828369, "learning_rate": 1.3914400852598114e-06, "loss": 0.616, "step": 8211 }, { "epoch": 1.19, "grad_norm": 6.455967426300049, "learning_rate": 1.3912944281572712e-06, "loss": 0.6061, "step": 8212 }, { "epoch": 1.19, "grad_norm": 7.167068004608154, "learning_rate": 1.3911487612514848e-06, "loss": 0.5847, "step": 8213 }, { "epoch": 1.19, "grad_norm": 7.130336761474609, "learning_rate": 1.3910030845461017e-06, "loss": 0.6452, "step": 8214 }, { "epoch": 1.19, "grad_norm": 6.4215850830078125, "learning_rate": 1.3908573980447713e-06, "loss": 0.497, "step": 8215 }, { "epoch": 1.19, "grad_norm": 7.378108501434326, "learning_rate": 1.390711701751144e-06, "loss": 0.5774, "step": 8216 }, { "epoch": 1.19, "grad_norm": 6.3931803703308105, "learning_rate": 1.39056599566887e-06, "loss": 0.5151, "step": 8217 }, { "epoch": 1.19, "grad_norm": 5.97069787979126, "learning_rate": 1.3904202798015996e-06, "loss": 0.5894, "step": 8218 }, { "epoch": 1.19, "grad_norm": 5.592534065246582, "learning_rate": 1.3902745541529838e-06, "loss": 0.4882, "step": 8219 }, { "epoch": 1.19, "grad_norm": 6.99402379989624, "learning_rate": 1.3901288187266725e-06, "loss": 0.5803, "step": 8220 }, { "epoch": 1.19, "grad_norm": 5.9984211921691895, "learning_rate": 1.3899830735263178e-06, "loss": 0.5136, "step": 8221 }, { "epoch": 1.19, "grad_norm": 6.885747909545898, "learning_rate": 1.3898373185555707e-06, "loss": 0.6483, "step": 8222 }, { "epoch": 1.19, "grad_norm": 6.213878154754639, "learning_rate": 1.3896915538180832e-06, "loss": 0.4783, "step": 8223 }, { "epoch": 1.19, "grad_norm": 6.24242639541626, "learning_rate": 1.3895457793175068e-06, "loss": 0.597, "step": 8224 }, { "epoch": 1.19, "grad_norm": 7.247714519500732, "learning_rate": 1.3893999950574941e-06, "loss": 0.7083, "step": 8225 }, { "epoch": 1.19, "grad_norm": 6.321686744689941, "learning_rate": 1.3892542010416973e-06, "loss": 0.5418, "step": 8226 }, { "epoch": 1.19, "grad_norm": 6.842547416687012, "learning_rate": 1.389108397273769e-06, "loss": 0.5584, "step": 8227 }, { "epoch": 1.19, "grad_norm": 7.432909965515137, "learning_rate": 1.3889625837573617e-06, "loss": 0.6199, "step": 8228 }, { "epoch": 1.19, "grad_norm": 6.871634483337402, "learning_rate": 1.388816760496129e-06, "loss": 0.5602, "step": 8229 }, { "epoch": 1.19, "grad_norm": 6.032170295715332, "learning_rate": 1.3886709274937243e-06, "loss": 0.5295, "step": 8230 }, { "epoch": 1.19, "grad_norm": 6.214892864227295, "learning_rate": 1.3885250847538007e-06, "loss": 0.5681, "step": 8231 }, { "epoch": 1.19, "grad_norm": 6.680408954620361, "learning_rate": 1.3883792322800123e-06, "loss": 0.5961, "step": 8232 }, { "epoch": 1.19, "grad_norm": 6.199265480041504, "learning_rate": 1.3882333700760138e-06, "loss": 0.481, "step": 8233 }, { "epoch": 1.19, "grad_norm": 6.080591201782227, "learning_rate": 1.3880874981454587e-06, "loss": 0.4863, "step": 8234 }, { "epoch": 1.19, "grad_norm": 6.49401330947876, "learning_rate": 1.387941616492002e-06, "loss": 0.5298, "step": 8235 }, { "epoch": 1.2, "grad_norm": 5.811765670776367, "learning_rate": 1.3877957251192984e-06, "loss": 0.5691, "step": 8236 }, { "epoch": 1.2, "grad_norm": 6.524777412414551, "learning_rate": 1.387649824031003e-06, "loss": 0.5309, "step": 8237 }, { "epoch": 1.2, "grad_norm": 5.607990741729736, "learning_rate": 1.3875039132307711e-06, "loss": 0.4575, "step": 8238 }, { "epoch": 1.2, "grad_norm": 6.3850789070129395, "learning_rate": 1.3873579927222583e-06, "loss": 0.5753, "step": 8239 }, { "epoch": 1.2, "grad_norm": 6.330826759338379, "learning_rate": 1.3872120625091205e-06, "loss": 0.5419, "step": 8240 }, { "epoch": 1.2, "grad_norm": 6.552235126495361, "learning_rate": 1.3870661225950135e-06, "loss": 0.5361, "step": 8241 }, { "epoch": 1.2, "grad_norm": 6.282201766967773, "learning_rate": 1.386920172983594e-06, "loss": 0.5537, "step": 8242 }, { "epoch": 1.2, "grad_norm": 6.264284133911133, "learning_rate": 1.3867742136785178e-06, "loss": 0.6346, "step": 8243 }, { "epoch": 1.2, "grad_norm": 6.3090739250183105, "learning_rate": 1.3866282446834423e-06, "loss": 0.504, "step": 8244 }, { "epoch": 1.2, "grad_norm": 6.130226135253906, "learning_rate": 1.3864822660020244e-06, "loss": 0.5426, "step": 8245 }, { "epoch": 1.2, "grad_norm": 9.082615852355957, "learning_rate": 1.3863362776379212e-06, "loss": 0.5257, "step": 8246 }, { "epoch": 1.2, "grad_norm": 7.02134370803833, "learning_rate": 1.3861902795947907e-06, "loss": 0.4989, "step": 8247 }, { "epoch": 1.2, "grad_norm": 6.864261150360107, "learning_rate": 1.3860442718762896e-06, "loss": 0.5263, "step": 8248 }, { "epoch": 1.2, "grad_norm": 6.347015857696533, "learning_rate": 1.3858982544860768e-06, "loss": 0.5063, "step": 8249 }, { "epoch": 1.2, "grad_norm": 6.372990131378174, "learning_rate": 1.3857522274278105e-06, "loss": 0.5057, "step": 8250 }, { "epoch": 1.2, "grad_norm": 5.99852180480957, "learning_rate": 1.3856061907051487e-06, "loss": 0.5584, "step": 8251 }, { "epoch": 1.2, "grad_norm": 6.433342456817627, "learning_rate": 1.38546014432175e-06, "loss": 0.5694, "step": 8252 }, { "epoch": 1.2, "grad_norm": 6.028768062591553, "learning_rate": 1.3853140882812742e-06, "loss": 0.517, "step": 8253 }, { "epoch": 1.2, "grad_norm": 6.335208415985107, "learning_rate": 1.3851680225873796e-06, "loss": 0.5067, "step": 8254 }, { "epoch": 1.2, "grad_norm": 5.590322494506836, "learning_rate": 1.3850219472437265e-06, "loss": 0.5516, "step": 8255 }, { "epoch": 1.2, "grad_norm": 5.979964256286621, "learning_rate": 1.3848758622539737e-06, "loss": 0.5003, "step": 8256 }, { "epoch": 1.2, "grad_norm": 6.14253044128418, "learning_rate": 1.3847297676217818e-06, "loss": 0.6098, "step": 8257 }, { "epoch": 1.2, "grad_norm": 6.2331061363220215, "learning_rate": 1.3845836633508103e-06, "loss": 0.5181, "step": 8258 }, { "epoch": 1.2, "grad_norm": 5.828697681427002, "learning_rate": 1.38443754944472e-06, "loss": 0.5355, "step": 8259 }, { "epoch": 1.2, "grad_norm": 5.69281530380249, "learning_rate": 1.384291425907172e-06, "loss": 0.4869, "step": 8260 }, { "epoch": 1.2, "grad_norm": 6.374936580657959, "learning_rate": 1.3841452927418268e-06, "loss": 0.5852, "step": 8261 }, { "epoch": 1.2, "grad_norm": 7.0432047843933105, "learning_rate": 1.383999149952345e-06, "loss": 0.5206, "step": 8262 }, { "epoch": 1.2, "grad_norm": 5.808401107788086, "learning_rate": 1.3838529975423887e-06, "loss": 0.5067, "step": 8263 }, { "epoch": 1.2, "grad_norm": 5.847195625305176, "learning_rate": 1.383706835515619e-06, "loss": 0.579, "step": 8264 }, { "epoch": 1.2, "grad_norm": 7.304813385009766, "learning_rate": 1.3835606638756981e-06, "loss": 0.5929, "step": 8265 }, { "epoch": 1.2, "grad_norm": 7.348912239074707, "learning_rate": 1.383414482626288e-06, "loss": 0.6356, "step": 8266 }, { "epoch": 1.2, "grad_norm": 6.131041049957275, "learning_rate": 1.383268291771051e-06, "loss": 0.5371, "step": 8267 }, { "epoch": 1.2, "grad_norm": 6.3786516189575195, "learning_rate": 1.38312209131365e-06, "loss": 0.5304, "step": 8268 }, { "epoch": 1.2, "grad_norm": 6.251201629638672, "learning_rate": 1.3829758812577473e-06, "loss": 0.5513, "step": 8269 }, { "epoch": 1.2, "grad_norm": 5.870251655578613, "learning_rate": 1.382829661607006e-06, "loss": 0.5156, "step": 8270 }, { "epoch": 1.2, "grad_norm": 6.4502973556518555, "learning_rate": 1.3826834323650898e-06, "loss": 0.564, "step": 8271 }, { "epoch": 1.2, "grad_norm": 7.533594608306885, "learning_rate": 1.3825371935356618e-06, "loss": 0.5328, "step": 8272 }, { "epoch": 1.2, "grad_norm": 6.199687480926514, "learning_rate": 1.382390945122386e-06, "loss": 0.56, "step": 8273 }, { "epoch": 1.2, "grad_norm": 7.662558078765869, "learning_rate": 1.3822446871289265e-06, "loss": 0.5929, "step": 8274 }, { "epoch": 1.2, "grad_norm": 6.6535186767578125, "learning_rate": 1.3820984195589477e-06, "loss": 0.5154, "step": 8275 }, { "epoch": 1.2, "grad_norm": 6.565281867980957, "learning_rate": 1.3819521424161133e-06, "loss": 0.5798, "step": 8276 }, { "epoch": 1.2, "grad_norm": 6.538479328155518, "learning_rate": 1.3818058557040887e-06, "loss": 0.4955, "step": 8277 }, { "epoch": 1.2, "grad_norm": 5.963287353515625, "learning_rate": 1.381659559426539e-06, "loss": 0.5319, "step": 8278 }, { "epoch": 1.2, "grad_norm": 5.854086875915527, "learning_rate": 1.381513253587129e-06, "loss": 0.5696, "step": 8279 }, { "epoch": 1.2, "grad_norm": 6.205610752105713, "learning_rate": 1.3813669381895244e-06, "loss": 0.4398, "step": 8280 }, { "epoch": 1.2, "grad_norm": 6.732860088348389, "learning_rate": 1.3812206132373906e-06, "loss": 0.5215, "step": 8281 }, { "epoch": 1.2, "grad_norm": 6.585350036621094, "learning_rate": 1.3810742787343942e-06, "loss": 0.4873, "step": 8282 }, { "epoch": 1.2, "grad_norm": 6.5972514152526855, "learning_rate": 1.380927934684201e-06, "loss": 0.5625, "step": 8283 }, { "epoch": 1.2, "grad_norm": 6.602931499481201, "learning_rate": 1.3807815810904769e-06, "loss": 0.5482, "step": 8284 }, { "epoch": 1.2, "grad_norm": 7.329185962677002, "learning_rate": 1.3806352179568888e-06, "loss": 0.5978, "step": 8285 }, { "epoch": 1.2, "grad_norm": 6.738537788391113, "learning_rate": 1.380488845287104e-06, "loss": 0.5573, "step": 8286 }, { "epoch": 1.2, "grad_norm": 6.771699905395508, "learning_rate": 1.3803424630847894e-06, "loss": 0.6525, "step": 8287 }, { "epoch": 1.2, "grad_norm": 6.9903411865234375, "learning_rate": 1.3801960713536127e-06, "loss": 0.5457, "step": 8288 }, { "epoch": 1.2, "grad_norm": 6.522207260131836, "learning_rate": 1.3800496700972408e-06, "loss": 0.6335, "step": 8289 }, { "epoch": 1.2, "grad_norm": 7.4154462814331055, "learning_rate": 1.3799032593193422e-06, "loss": 0.6902, "step": 8290 }, { "epoch": 1.2, "grad_norm": 7.200289249420166, "learning_rate": 1.3797568390235845e-06, "loss": 0.5776, "step": 8291 }, { "epoch": 1.2, "grad_norm": 6.239845275878906, "learning_rate": 1.3796104092136362e-06, "loss": 0.5771, "step": 8292 }, { "epoch": 1.2, "grad_norm": 6.640701770782471, "learning_rate": 1.3794639698931663e-06, "loss": 0.645, "step": 8293 }, { "epoch": 1.2, "grad_norm": 7.030882835388184, "learning_rate": 1.3793175210658428e-06, "loss": 0.5687, "step": 8294 }, { "epoch": 1.2, "grad_norm": 6.078352451324463, "learning_rate": 1.3791710627353353e-06, "loss": 0.4815, "step": 8295 }, { "epoch": 1.2, "grad_norm": 6.3162970542907715, "learning_rate": 1.379024594905313e-06, "loss": 0.5453, "step": 8296 }, { "epoch": 1.2, "grad_norm": 6.440436363220215, "learning_rate": 1.3788781175794453e-06, "loss": 0.5715, "step": 8297 }, { "epoch": 1.2, "grad_norm": 6.577070713043213, "learning_rate": 1.378731630761402e-06, "loss": 0.5472, "step": 8298 }, { "epoch": 1.2, "grad_norm": 6.420097827911377, "learning_rate": 1.3785851344548528e-06, "loss": 0.5158, "step": 8299 }, { "epoch": 1.2, "grad_norm": 5.575781345367432, "learning_rate": 1.3784386286634684e-06, "loss": 0.4864, "step": 8300 }, { "epoch": 1.2, "grad_norm": 6.83379602432251, "learning_rate": 1.378292113390919e-06, "loss": 0.5471, "step": 8301 }, { "epoch": 1.2, "grad_norm": 6.374109745025635, "learning_rate": 1.378145588640875e-06, "loss": 0.5255, "step": 8302 }, { "epoch": 1.2, "grad_norm": 6.446233749389648, "learning_rate": 1.3779990544170084e-06, "loss": 0.6176, "step": 8303 }, { "epoch": 1.2, "grad_norm": 6.323050022125244, "learning_rate": 1.3778525107229895e-06, "loss": 0.5248, "step": 8304 }, { "epoch": 1.21, "grad_norm": 6.66437292098999, "learning_rate": 1.3777059575624895e-06, "loss": 0.5386, "step": 8305 }, { "epoch": 1.21, "grad_norm": 6.903566360473633, "learning_rate": 1.3775593949391806e-06, "loss": 0.5694, "step": 8306 }, { "epoch": 1.21, "grad_norm": 6.282872676849365, "learning_rate": 1.3774128228567347e-06, "loss": 0.5773, "step": 8307 }, { "epoch": 1.21, "grad_norm": 5.839293479919434, "learning_rate": 1.3772662413188236e-06, "loss": 0.4969, "step": 8308 }, { "epoch": 1.21, "grad_norm": 7.072429180145264, "learning_rate": 1.3771196503291198e-06, "loss": 0.5333, "step": 8309 }, { "epoch": 1.21, "grad_norm": 6.1153244972229, "learning_rate": 1.3769730498912962e-06, "loss": 0.572, "step": 8310 }, { "epoch": 1.21, "grad_norm": 6.23015022277832, "learning_rate": 1.376826440009025e-06, "loss": 0.4781, "step": 8311 }, { "epoch": 1.21, "grad_norm": 6.286474227905273, "learning_rate": 1.3766798206859797e-06, "loss": 0.5421, "step": 8312 }, { "epoch": 1.21, "grad_norm": 6.552224636077881, "learning_rate": 1.3765331919258336e-06, "loss": 0.6251, "step": 8313 }, { "epoch": 1.21, "grad_norm": 6.421546936035156, "learning_rate": 1.37638655373226e-06, "loss": 0.5317, "step": 8314 }, { "epoch": 1.21, "grad_norm": 6.436639785766602, "learning_rate": 1.376239906108933e-06, "loss": 0.575, "step": 8315 }, { "epoch": 1.21, "grad_norm": 6.304556369781494, "learning_rate": 1.3760932490595267e-06, "loss": 0.4739, "step": 8316 }, { "epoch": 1.21, "grad_norm": 5.984697341918945, "learning_rate": 1.375946582587715e-06, "loss": 0.5049, "step": 8317 }, { "epoch": 1.21, "grad_norm": 5.941859722137451, "learning_rate": 1.3757999066971724e-06, "loss": 0.5442, "step": 8318 }, { "epoch": 1.21, "grad_norm": 6.33406400680542, "learning_rate": 1.3756532213915736e-06, "loss": 0.6126, "step": 8319 }, { "epoch": 1.21, "grad_norm": 5.724191665649414, "learning_rate": 1.3755065266745941e-06, "loss": 0.4908, "step": 8320 }, { "epoch": 1.21, "grad_norm": 7.1405720710754395, "learning_rate": 1.3753598225499086e-06, "loss": 0.6382, "step": 8321 }, { "epoch": 1.21, "grad_norm": 5.5074076652526855, "learning_rate": 1.3752131090211925e-06, "loss": 0.5371, "step": 8322 }, { "epoch": 1.21, "grad_norm": 6.002310276031494, "learning_rate": 1.3750663860921216e-06, "loss": 0.5008, "step": 8323 }, { "epoch": 1.21, "grad_norm": 7.6090288162231445, "learning_rate": 1.3749196537663721e-06, "loss": 0.55, "step": 8324 }, { "epoch": 1.21, "grad_norm": 7.86501932144165, "learning_rate": 1.3747729120476197e-06, "loss": 0.5627, "step": 8325 }, { "epoch": 1.21, "grad_norm": 6.416964530944824, "learning_rate": 1.3746261609395408e-06, "loss": 0.5012, "step": 8326 }, { "epoch": 1.21, "grad_norm": 6.073588848114014, "learning_rate": 1.3744794004458122e-06, "loss": 0.5972, "step": 8327 }, { "epoch": 1.21, "grad_norm": 7.202480316162109, "learning_rate": 1.3743326305701107e-06, "loss": 0.5743, "step": 8328 }, { "epoch": 1.21, "grad_norm": 6.59780216217041, "learning_rate": 1.3741858513161133e-06, "loss": 0.5626, "step": 8329 }, { "epoch": 1.21, "grad_norm": 6.389124870300293, "learning_rate": 1.3740390626874976e-06, "loss": 0.5259, "step": 8330 }, { "epoch": 1.21, "grad_norm": 6.961950778961182, "learning_rate": 1.3738922646879409e-06, "loss": 0.5433, "step": 8331 }, { "epoch": 1.21, "grad_norm": 7.384265422821045, "learning_rate": 1.373745457321121e-06, "loss": 0.5082, "step": 8332 }, { "epoch": 1.21, "grad_norm": 7.376270294189453, "learning_rate": 1.3735986405907157e-06, "loss": 0.6115, "step": 8333 }, { "epoch": 1.21, "grad_norm": 6.379144191741943, "learning_rate": 1.3734518145004037e-06, "loss": 0.4719, "step": 8334 }, { "epoch": 1.21, "grad_norm": 6.649449348449707, "learning_rate": 1.373304979053863e-06, "loss": 0.5495, "step": 8335 }, { "epoch": 1.21, "grad_norm": 5.989920139312744, "learning_rate": 1.373158134254773e-06, "loss": 0.6197, "step": 8336 }, { "epoch": 1.21, "grad_norm": 6.519174098968506, "learning_rate": 1.3730112801068122e-06, "loss": 0.5311, "step": 8337 }, { "epoch": 1.21, "grad_norm": 6.914371013641357, "learning_rate": 1.3728644166136596e-06, "loss": 0.5114, "step": 8338 }, { "epoch": 1.21, "grad_norm": 6.077116966247559, "learning_rate": 1.3727175437789948e-06, "loss": 0.5505, "step": 8339 }, { "epoch": 1.21, "grad_norm": 5.931432723999023, "learning_rate": 1.372570661606498e-06, "loss": 0.5427, "step": 8340 }, { "epoch": 1.21, "grad_norm": 6.223948001861572, "learning_rate": 1.3724237700998483e-06, "loss": 0.5756, "step": 8341 }, { "epoch": 1.21, "grad_norm": 6.7950544357299805, "learning_rate": 1.3722768692627261e-06, "loss": 0.5159, "step": 8342 }, { "epoch": 1.21, "grad_norm": 6.746026039123535, "learning_rate": 1.3721299590988122e-06, "loss": 0.5363, "step": 8343 }, { "epoch": 1.21, "grad_norm": 6.874584674835205, "learning_rate": 1.3719830396117869e-06, "loss": 0.6084, "step": 8344 }, { "epoch": 1.21, "grad_norm": 5.914638996124268, "learning_rate": 1.3718361108053306e-06, "loss": 0.5251, "step": 8345 }, { "epoch": 1.21, "grad_norm": 6.636406421661377, "learning_rate": 1.3716891726831247e-06, "loss": 0.5456, "step": 8346 }, { "epoch": 1.21, "grad_norm": 7.544273853302002, "learning_rate": 1.3715422252488507e-06, "loss": 0.6281, "step": 8347 }, { "epoch": 1.21, "grad_norm": 5.841885089874268, "learning_rate": 1.3713952685061897e-06, "loss": 0.529, "step": 8348 }, { "epoch": 1.21, "grad_norm": 6.8014068603515625, "learning_rate": 1.371248302458824e-06, "loss": 0.5599, "step": 8349 }, { "epoch": 1.21, "grad_norm": 6.291095733642578, "learning_rate": 1.371101327110435e-06, "loss": 0.5116, "step": 8350 }, { "epoch": 1.21, "grad_norm": 6.790683269500732, "learning_rate": 1.3709543424647054e-06, "loss": 0.5349, "step": 8351 }, { "epoch": 1.21, "grad_norm": 6.358388900756836, "learning_rate": 1.3708073485253176e-06, "loss": 0.5736, "step": 8352 }, { "epoch": 1.21, "grad_norm": 6.661398410797119, "learning_rate": 1.370660345295954e-06, "loss": 0.5212, "step": 8353 }, { "epoch": 1.21, "grad_norm": 6.622435092926025, "learning_rate": 1.3705133327802979e-06, "loss": 0.5415, "step": 8354 }, { "epoch": 1.21, "grad_norm": 6.504112720489502, "learning_rate": 1.370366310982032e-06, "loss": 0.5555, "step": 8355 }, { "epoch": 1.21, "grad_norm": 7.204447269439697, "learning_rate": 1.37021927990484e-06, "loss": 0.6112, "step": 8356 }, { "epoch": 1.21, "grad_norm": 5.422383785247803, "learning_rate": 1.3700722395524057e-06, "loss": 0.4758, "step": 8357 }, { "epoch": 1.21, "grad_norm": 6.426097393035889, "learning_rate": 1.3699251899284125e-06, "loss": 0.5827, "step": 8358 }, { "epoch": 1.21, "grad_norm": 6.240495681762695, "learning_rate": 1.369778131036545e-06, "loss": 0.5828, "step": 8359 }, { "epoch": 1.21, "grad_norm": 6.8474202156066895, "learning_rate": 1.369631062880487e-06, "loss": 0.5936, "step": 8360 }, { "epoch": 1.21, "grad_norm": 5.839696407318115, "learning_rate": 1.3694839854639236e-06, "loss": 0.5612, "step": 8361 }, { "epoch": 1.21, "grad_norm": 6.039695739746094, "learning_rate": 1.369336898790539e-06, "loss": 0.5401, "step": 8362 }, { "epoch": 1.21, "grad_norm": 6.002223491668701, "learning_rate": 1.3691898028640183e-06, "loss": 0.5124, "step": 8363 }, { "epoch": 1.21, "grad_norm": 5.750858306884766, "learning_rate": 1.3690426976880471e-06, "loss": 0.5735, "step": 8364 }, { "epoch": 1.21, "grad_norm": 5.993997097015381, "learning_rate": 1.368895583266311e-06, "loss": 0.5745, "step": 8365 }, { "epoch": 1.21, "grad_norm": 7.417006492614746, "learning_rate": 1.3687484596024956e-06, "loss": 0.5106, "step": 8366 }, { "epoch": 1.21, "grad_norm": 6.247371196746826, "learning_rate": 1.3686013267002862e-06, "loss": 0.5096, "step": 8367 }, { "epoch": 1.21, "grad_norm": 7.036191940307617, "learning_rate": 1.36845418456337e-06, "loss": 0.5885, "step": 8368 }, { "epoch": 1.21, "grad_norm": 6.168050289154053, "learning_rate": 1.3683070331954324e-06, "loss": 0.4818, "step": 8369 }, { "epoch": 1.21, "grad_norm": 6.181603908538818, "learning_rate": 1.368159872600161e-06, "loss": 0.5051, "step": 8370 }, { "epoch": 1.21, "grad_norm": 6.4685773849487305, "learning_rate": 1.3680127027812416e-06, "loss": 0.5361, "step": 8371 }, { "epoch": 1.21, "grad_norm": 6.616424560546875, "learning_rate": 1.3678655237423624e-06, "loss": 0.5248, "step": 8372 }, { "epoch": 1.21, "grad_norm": 6.425858974456787, "learning_rate": 1.36771833548721e-06, "loss": 0.5594, "step": 8373 }, { "epoch": 1.22, "grad_norm": 6.267376899719238, "learning_rate": 1.3675711380194724e-06, "loss": 0.4526, "step": 8374 }, { "epoch": 1.22, "grad_norm": 6.625524520874023, "learning_rate": 1.367423931342837e-06, "loss": 0.5642, "step": 8375 }, { "epoch": 1.22, "grad_norm": 6.289457321166992, "learning_rate": 1.3672767154609917e-06, "loss": 0.5762, "step": 8376 }, { "epoch": 1.22, "grad_norm": 6.421584129333496, "learning_rate": 1.3671294903776253e-06, "loss": 0.524, "step": 8377 }, { "epoch": 1.22, "grad_norm": 5.645164489746094, "learning_rate": 1.3669822560964262e-06, "loss": 0.5129, "step": 8378 }, { "epoch": 1.22, "grad_norm": 6.180556774139404, "learning_rate": 1.366835012621083e-06, "loss": 0.5378, "step": 8379 }, { "epoch": 1.22, "grad_norm": 6.101103782653809, "learning_rate": 1.3666877599552843e-06, "loss": 0.5885, "step": 8380 }, { "epoch": 1.22, "grad_norm": 6.058620929718018, "learning_rate": 1.3665404981027198e-06, "loss": 0.6294, "step": 8381 }, { "epoch": 1.22, "grad_norm": 6.092445373535156, "learning_rate": 1.3663932270670785e-06, "loss": 0.5207, "step": 8382 }, { "epoch": 1.22, "grad_norm": 6.108989715576172, "learning_rate": 1.3662459468520505e-06, "loss": 0.562, "step": 8383 }, { "epoch": 1.22, "grad_norm": 6.144993782043457, "learning_rate": 1.366098657461325e-06, "loss": 0.5238, "step": 8384 }, { "epoch": 1.22, "grad_norm": 6.715200901031494, "learning_rate": 1.365951358898593e-06, "loss": 0.5143, "step": 8385 }, { "epoch": 1.22, "grad_norm": 6.065479278564453, "learning_rate": 1.3658040511675436e-06, "loss": 0.5117, "step": 8386 }, { "epoch": 1.22, "grad_norm": 5.804692268371582, "learning_rate": 1.365656734271869e-06, "loss": 0.5463, "step": 8387 }, { "epoch": 1.22, "grad_norm": 6.370569229125977, "learning_rate": 1.3655094082152583e-06, "loss": 0.5399, "step": 8388 }, { "epoch": 1.22, "grad_norm": 7.3080153465271, "learning_rate": 1.3653620730014034e-06, "loss": 0.5089, "step": 8389 }, { "epoch": 1.22, "grad_norm": 6.224343299865723, "learning_rate": 1.3652147286339954e-06, "loss": 0.5121, "step": 8390 }, { "epoch": 1.22, "grad_norm": 5.832817554473877, "learning_rate": 1.3650673751167258e-06, "loss": 0.5546, "step": 8391 }, { "epoch": 1.22, "grad_norm": 6.942116737365723, "learning_rate": 1.3649200124532865e-06, "loss": 0.5662, "step": 8392 }, { "epoch": 1.22, "grad_norm": 7.2187418937683105, "learning_rate": 1.3647726406473692e-06, "loss": 0.5671, "step": 8393 }, { "epoch": 1.22, "grad_norm": 6.411679267883301, "learning_rate": 1.3646252597026662e-06, "loss": 0.5403, "step": 8394 }, { "epoch": 1.22, "grad_norm": 6.980854511260986, "learning_rate": 1.3644778696228694e-06, "loss": 0.5803, "step": 8395 }, { "epoch": 1.22, "grad_norm": 7.33740234375, "learning_rate": 1.364330470411672e-06, "loss": 0.5669, "step": 8396 }, { "epoch": 1.22, "grad_norm": 7.134344577789307, "learning_rate": 1.3641830620727667e-06, "loss": 0.5543, "step": 8397 }, { "epoch": 1.22, "grad_norm": 5.756258487701416, "learning_rate": 1.3640356446098463e-06, "loss": 0.4579, "step": 8398 }, { "epoch": 1.22, "grad_norm": 5.607264518737793, "learning_rate": 1.3638882180266043e-06, "loss": 0.5799, "step": 8399 }, { "epoch": 1.22, "grad_norm": 6.6882147789001465, "learning_rate": 1.3637407823267346e-06, "loss": 0.5771, "step": 8400 }, { "epoch": 1.22, "grad_norm": 6.622845649719238, "learning_rate": 1.3635933375139303e-06, "loss": 0.6304, "step": 8401 }, { "epoch": 1.22, "grad_norm": 6.377245903015137, "learning_rate": 1.363445883591886e-06, "loss": 0.5506, "step": 8402 }, { "epoch": 1.22, "grad_norm": 6.650356292724609, "learning_rate": 1.3632984205642952e-06, "loss": 0.5451, "step": 8403 }, { "epoch": 1.22, "grad_norm": 6.308398246765137, "learning_rate": 1.363150948434853e-06, "loss": 0.6154, "step": 8404 }, { "epoch": 1.22, "grad_norm": 6.7729973793029785, "learning_rate": 1.3630034672072538e-06, "loss": 0.5717, "step": 8405 }, { "epoch": 1.22, "grad_norm": 5.783286094665527, "learning_rate": 1.3628559768851925e-06, "loss": 0.5329, "step": 8406 }, { "epoch": 1.22, "grad_norm": 6.345160484313965, "learning_rate": 1.3627084774723647e-06, "loss": 0.5893, "step": 8407 }, { "epoch": 1.22, "grad_norm": 5.9187235832214355, "learning_rate": 1.3625609689724651e-06, "loss": 0.4938, "step": 8408 }, { "epoch": 1.22, "grad_norm": 6.59353494644165, "learning_rate": 1.3624134513891893e-06, "loss": 0.5818, "step": 8409 }, { "epoch": 1.22, "grad_norm": 6.295797824859619, "learning_rate": 1.3622659247262333e-06, "loss": 0.5299, "step": 8410 }, { "epoch": 1.22, "grad_norm": 6.396012783050537, "learning_rate": 1.3621183889872934e-06, "loss": 0.492, "step": 8411 }, { "epoch": 1.22, "grad_norm": 6.798986434936523, "learning_rate": 1.3619708441760655e-06, "loss": 0.4786, "step": 8412 }, { "epoch": 1.22, "grad_norm": 5.991454124450684, "learning_rate": 1.3618232902962464e-06, "loss": 0.4884, "step": 8413 }, { "epoch": 1.22, "grad_norm": 6.755389213562012, "learning_rate": 1.3616757273515326e-06, "loss": 0.5184, "step": 8414 }, { "epoch": 1.22, "grad_norm": 7.11316442489624, "learning_rate": 1.361528155345621e-06, "loss": 0.6545, "step": 8415 }, { "epoch": 1.22, "grad_norm": 6.561296463012695, "learning_rate": 1.3613805742822092e-06, "loss": 0.5975, "step": 8416 }, { "epoch": 1.22, "grad_norm": 6.383213043212891, "learning_rate": 1.361232984164994e-06, "loss": 0.5284, "step": 8417 }, { "epoch": 1.22, "grad_norm": 6.477016925811768, "learning_rate": 1.3610853849976734e-06, "loss": 0.5063, "step": 8418 }, { "epoch": 1.22, "grad_norm": 6.529384136199951, "learning_rate": 1.3609377767839449e-06, "loss": 0.5476, "step": 8419 }, { "epoch": 1.22, "grad_norm": 6.260448455810547, "learning_rate": 1.3607901595275075e-06, "loss": 0.5289, "step": 8420 }, { "epoch": 1.22, "grad_norm": 6.765640735626221, "learning_rate": 1.3606425332320584e-06, "loss": 0.5621, "step": 8421 }, { "epoch": 1.22, "grad_norm": 6.354732513427734, "learning_rate": 1.360494897901297e-06, "loss": 0.4595, "step": 8422 }, { "epoch": 1.22, "grad_norm": 6.891499996185303, "learning_rate": 1.360347253538921e-06, "loss": 0.5849, "step": 8423 }, { "epoch": 1.22, "grad_norm": 6.860823154449463, "learning_rate": 1.3601996001486307e-06, "loss": 0.5927, "step": 8424 }, { "epoch": 1.22, "grad_norm": 6.242162227630615, "learning_rate": 1.3600519377341245e-06, "loss": 0.6117, "step": 8425 }, { "epoch": 1.22, "grad_norm": 6.4702229499816895, "learning_rate": 1.3599042662991019e-06, "loss": 0.5379, "step": 8426 }, { "epoch": 1.22, "grad_norm": 6.780038833618164, "learning_rate": 1.3597565858472625e-06, "loss": 0.5517, "step": 8427 }, { "epoch": 1.22, "grad_norm": 7.187775611877441, "learning_rate": 1.3596088963823072e-06, "loss": 0.6317, "step": 8428 }, { "epoch": 1.22, "grad_norm": 5.531118392944336, "learning_rate": 1.3594611979079344e-06, "loss": 0.4925, "step": 8429 }, { "epoch": 1.22, "grad_norm": 5.855926990509033, "learning_rate": 1.3593134904278457e-06, "loss": 0.4754, "step": 8430 }, { "epoch": 1.22, "grad_norm": 6.491123676300049, "learning_rate": 1.3591657739457416e-06, "loss": 0.5218, "step": 8431 }, { "epoch": 1.22, "grad_norm": 6.284700393676758, "learning_rate": 1.3590180484653223e-06, "loss": 0.5422, "step": 8432 }, { "epoch": 1.22, "grad_norm": 6.411340713500977, "learning_rate": 1.358870313990289e-06, "loss": 0.5759, "step": 8433 }, { "epoch": 1.22, "grad_norm": 6.458240032196045, "learning_rate": 1.3587225705243434e-06, "loss": 0.5384, "step": 8434 }, { "epoch": 1.22, "grad_norm": 6.299957752227783, "learning_rate": 1.3585748180711867e-06, "loss": 0.5766, "step": 8435 }, { "epoch": 1.22, "grad_norm": 6.9629340171813965, "learning_rate": 1.3584270566345203e-06, "loss": 0.6012, "step": 8436 }, { "epoch": 1.22, "grad_norm": 6.7724480628967285, "learning_rate": 1.3582792862180466e-06, "loss": 0.5826, "step": 8437 }, { "epoch": 1.22, "grad_norm": 6.980266094207764, "learning_rate": 1.3581315068254674e-06, "loss": 0.6027, "step": 8438 }, { "epoch": 1.22, "grad_norm": 6.925947666168213, "learning_rate": 1.357983718460485e-06, "loss": 0.5824, "step": 8439 }, { "epoch": 1.22, "grad_norm": 6.560008525848389, "learning_rate": 1.3578359211268024e-06, "loss": 0.5411, "step": 8440 }, { "epoch": 1.22, "grad_norm": 6.990047931671143, "learning_rate": 1.3576881148281221e-06, "loss": 0.5358, "step": 8441 }, { "epoch": 1.22, "grad_norm": 5.57947301864624, "learning_rate": 1.3575402995681472e-06, "loss": 0.4995, "step": 8442 }, { "epoch": 1.23, "grad_norm": 6.893408298492432, "learning_rate": 1.3573924753505815e-06, "loss": 0.5982, "step": 8443 }, { "epoch": 1.23, "grad_norm": 7.126684665679932, "learning_rate": 1.3572446421791276e-06, "loss": 0.5698, "step": 8444 }, { "epoch": 1.23, "grad_norm": 7.151846885681152, "learning_rate": 1.3570968000574897e-06, "loss": 0.5447, "step": 8445 }, { "epoch": 1.23, "grad_norm": 6.603877544403076, "learning_rate": 1.356948948989372e-06, "loss": 0.6062, "step": 8446 }, { "epoch": 1.23, "grad_norm": 5.981165409088135, "learning_rate": 1.356801088978478e-06, "loss": 0.5043, "step": 8447 }, { "epoch": 1.23, "grad_norm": 6.49359655380249, "learning_rate": 1.3566532200285128e-06, "loss": 0.5238, "step": 8448 }, { "epoch": 1.23, "grad_norm": 6.056221008300781, "learning_rate": 1.3565053421431805e-06, "loss": 0.5503, "step": 8449 }, { "epoch": 1.23, "grad_norm": 6.2847137451171875, "learning_rate": 1.356357455326186e-06, "loss": 0.6219, "step": 8450 }, { "epoch": 1.23, "grad_norm": 5.783764839172363, "learning_rate": 1.3562095595812345e-06, "loss": 0.5013, "step": 8451 }, { "epoch": 1.23, "grad_norm": 5.725198745727539, "learning_rate": 1.3560616549120313e-06, "loss": 0.5431, "step": 8452 }, { "epoch": 1.23, "grad_norm": 6.395634651184082, "learning_rate": 1.3559137413222818e-06, "loss": 0.559, "step": 8453 }, { "epoch": 1.23, "grad_norm": 6.036407947540283, "learning_rate": 1.355765818815692e-06, "loss": 0.5174, "step": 8454 }, { "epoch": 1.23, "grad_norm": 6.522645473480225, "learning_rate": 1.3556178873959675e-06, "loss": 0.5169, "step": 8455 }, { "epoch": 1.23, "grad_norm": 6.255561828613281, "learning_rate": 1.355469947066815e-06, "loss": 0.5266, "step": 8456 }, { "epoch": 1.23, "grad_norm": 7.051966667175293, "learning_rate": 1.3553219978319403e-06, "loss": 0.5308, "step": 8457 }, { "epoch": 1.23, "grad_norm": 5.830984115600586, "learning_rate": 1.3551740396950502e-06, "loss": 0.5624, "step": 8458 }, { "epoch": 1.23, "grad_norm": 6.210357666015625, "learning_rate": 1.3550260726598519e-06, "loss": 0.576, "step": 8459 }, { "epoch": 1.23, "grad_norm": 5.239691257476807, "learning_rate": 1.3548780967300519e-06, "loss": 0.4955, "step": 8460 }, { "epoch": 1.23, "grad_norm": 5.936300277709961, "learning_rate": 1.3547301119093578e-06, "loss": 0.5492, "step": 8461 }, { "epoch": 1.23, "grad_norm": 6.083498954772949, "learning_rate": 1.3545821182014772e-06, "loss": 0.5198, "step": 8462 }, { "epoch": 1.23, "grad_norm": 6.599551200866699, "learning_rate": 1.3544341156101181e-06, "loss": 0.5547, "step": 8463 }, { "epoch": 1.23, "grad_norm": 6.4477858543396, "learning_rate": 1.3542861041389876e-06, "loss": 0.5234, "step": 8464 }, { "epoch": 1.23, "grad_norm": 6.248443603515625, "learning_rate": 1.3541380837917948e-06, "loss": 0.5354, "step": 8465 }, { "epoch": 1.23, "grad_norm": 6.832348346710205, "learning_rate": 1.3539900545722476e-06, "loss": 0.5426, "step": 8466 }, { "epoch": 1.23, "grad_norm": 5.7680277824401855, "learning_rate": 1.3538420164840546e-06, "loss": 0.5749, "step": 8467 }, { "epoch": 1.23, "grad_norm": 6.0982184410095215, "learning_rate": 1.3536939695309248e-06, "loss": 0.51, "step": 8468 }, { "epoch": 1.23, "grad_norm": 6.010354518890381, "learning_rate": 1.3535459137165677e-06, "loss": 0.5288, "step": 8469 }, { "epoch": 1.23, "grad_norm": 7.990304470062256, "learning_rate": 1.3533978490446917e-06, "loss": 0.5204, "step": 8470 }, { "epoch": 1.23, "grad_norm": 6.348916530609131, "learning_rate": 1.353249775519007e-06, "loss": 0.503, "step": 8471 }, { "epoch": 1.23, "grad_norm": 5.93491792678833, "learning_rate": 1.3531016931432232e-06, "loss": 0.5872, "step": 8472 }, { "epoch": 1.23, "grad_norm": 5.847142696380615, "learning_rate": 1.3529536019210502e-06, "loss": 0.4439, "step": 8473 }, { "epoch": 1.23, "grad_norm": 7.3719096183776855, "learning_rate": 1.3528055018561982e-06, "loss": 0.5754, "step": 8474 }, { "epoch": 1.23, "grad_norm": 6.29213285446167, "learning_rate": 1.3526573929523774e-06, "loss": 0.5277, "step": 8475 }, { "epoch": 1.23, "grad_norm": 6.610665798187256, "learning_rate": 1.352509275213299e-06, "loss": 0.5498, "step": 8476 }, { "epoch": 1.23, "grad_norm": 6.181795120239258, "learning_rate": 1.3523611486426732e-06, "loss": 0.5853, "step": 8477 }, { "epoch": 1.23, "grad_norm": 6.63381290435791, "learning_rate": 1.3522130132442113e-06, "loss": 0.5508, "step": 8478 }, { "epoch": 1.23, "grad_norm": 6.914974689483643, "learning_rate": 1.3520648690216248e-06, "loss": 0.5803, "step": 8479 }, { "epoch": 1.23, "grad_norm": 7.010370254516602, "learning_rate": 1.351916715978625e-06, "loss": 0.6206, "step": 8480 }, { "epoch": 1.23, "grad_norm": 5.740673065185547, "learning_rate": 1.3517685541189236e-06, "loss": 0.5486, "step": 8481 }, { "epoch": 1.23, "grad_norm": 6.0120086669921875, "learning_rate": 1.3516203834462327e-06, "loss": 0.4919, "step": 8482 }, { "epoch": 1.23, "grad_norm": 5.969104766845703, "learning_rate": 1.3514722039642646e-06, "loss": 0.4786, "step": 8483 }, { "epoch": 1.23, "grad_norm": 5.828500747680664, "learning_rate": 1.3513240156767313e-06, "loss": 0.5079, "step": 8484 }, { "epoch": 1.23, "grad_norm": 6.738088130950928, "learning_rate": 1.3511758185873458e-06, "loss": 0.5502, "step": 8485 }, { "epoch": 1.23, "grad_norm": 6.299449920654297, "learning_rate": 1.3510276126998206e-06, "loss": 0.5359, "step": 8486 }, { "epoch": 1.23, "grad_norm": 6.560213565826416, "learning_rate": 1.350879398017869e-06, "loss": 0.4891, "step": 8487 }, { "epoch": 1.23, "grad_norm": 6.102088451385498, "learning_rate": 1.350731174545204e-06, "loss": 0.5817, "step": 8488 }, { "epoch": 1.23, "grad_norm": 6.749282360076904, "learning_rate": 1.3505829422855395e-06, "loss": 0.5928, "step": 8489 }, { "epoch": 1.23, "grad_norm": 6.4501752853393555, "learning_rate": 1.350434701242589e-06, "loss": 0.5432, "step": 8490 }, { "epoch": 1.23, "grad_norm": 6.511457920074463, "learning_rate": 1.3502864514200666e-06, "loss": 0.529, "step": 8491 }, { "epoch": 1.23, "grad_norm": 6.9881157875061035, "learning_rate": 1.3501381928216862e-06, "loss": 0.5759, "step": 8492 }, { "epoch": 1.23, "grad_norm": 5.964664936065674, "learning_rate": 1.349989925451162e-06, "loss": 0.5316, "step": 8493 }, { "epoch": 1.23, "grad_norm": 6.6866865158081055, "learning_rate": 1.3498416493122091e-06, "loss": 0.5956, "step": 8494 }, { "epoch": 1.23, "grad_norm": 5.837125301361084, "learning_rate": 1.3496933644085422e-06, "loss": 0.5145, "step": 8495 }, { "epoch": 1.23, "grad_norm": 6.181972503662109, "learning_rate": 1.3495450707438762e-06, "loss": 0.5447, "step": 8496 }, { "epoch": 1.23, "grad_norm": 6.402971267700195, "learning_rate": 1.3493967683219266e-06, "loss": 0.4875, "step": 8497 }, { "epoch": 1.23, "grad_norm": 6.050235748291016, "learning_rate": 1.3492484571464084e-06, "loss": 0.5565, "step": 8498 }, { "epoch": 1.23, "grad_norm": 5.747749328613281, "learning_rate": 1.3491001372210376e-06, "loss": 0.5164, "step": 8499 }, { "epoch": 1.23, "grad_norm": 5.783233642578125, "learning_rate": 1.3489518085495302e-06, "loss": 0.5097, "step": 8500 }, { "epoch": 1.23, "grad_norm": 5.374823093414307, "learning_rate": 1.3488034711356022e-06, "loss": 0.5388, "step": 8501 }, { "epoch": 1.23, "grad_norm": 7.135828495025635, "learning_rate": 1.3486551249829698e-06, "loss": 0.5543, "step": 8502 }, { "epoch": 1.23, "grad_norm": 6.889443874359131, "learning_rate": 1.3485067700953502e-06, "loss": 0.5392, "step": 8503 }, { "epoch": 1.23, "grad_norm": 6.851449489593506, "learning_rate": 1.3483584064764592e-06, "loss": 0.4895, "step": 8504 }, { "epoch": 1.23, "grad_norm": 5.699194431304932, "learning_rate": 1.3482100341300147e-06, "loss": 0.5431, "step": 8505 }, { "epoch": 1.23, "grad_norm": 6.058678150177002, "learning_rate": 1.3480616530597336e-06, "loss": 0.5125, "step": 8506 }, { "epoch": 1.23, "grad_norm": 6.652178764343262, "learning_rate": 1.3479132632693332e-06, "loss": 0.5484, "step": 8507 }, { "epoch": 1.23, "grad_norm": 6.423625469207764, "learning_rate": 1.347764864762531e-06, "loss": 0.5619, "step": 8508 }, { "epoch": 1.23, "grad_norm": 6.996756076812744, "learning_rate": 1.3476164575430456e-06, "loss": 0.5413, "step": 8509 }, { "epoch": 1.23, "grad_norm": 6.683963775634766, "learning_rate": 1.3474680416145944e-06, "loss": 0.5484, "step": 8510 }, { "epoch": 1.23, "grad_norm": 6.320178985595703, "learning_rate": 1.3473196169808963e-06, "loss": 0.5317, "step": 8511 }, { "epoch": 1.24, "grad_norm": 5.445772647857666, "learning_rate": 1.3471711836456695e-06, "loss": 0.5326, "step": 8512 }, { "epoch": 1.24, "grad_norm": 7.938232898712158, "learning_rate": 1.3470227416126325e-06, "loss": 0.5599, "step": 8513 }, { "epoch": 1.24, "grad_norm": 6.945195198059082, "learning_rate": 1.3468742908855046e-06, "loss": 0.5663, "step": 8514 }, { "epoch": 1.24, "grad_norm": 6.130713939666748, "learning_rate": 1.3467258314680051e-06, "loss": 0.6062, "step": 8515 }, { "epoch": 1.24, "grad_norm": 7.2895402908325195, "learning_rate": 1.346577363363853e-06, "loss": 0.6202, "step": 8516 }, { "epoch": 1.24, "grad_norm": 6.0623459815979, "learning_rate": 1.3464288865767686e-06, "loss": 0.585, "step": 8517 }, { "epoch": 1.24, "grad_norm": 6.438503742218018, "learning_rate": 1.3462804011104712e-06, "loss": 0.5534, "step": 8518 }, { "epoch": 1.24, "grad_norm": 6.827548027038574, "learning_rate": 1.3461319069686809e-06, "loss": 0.6167, "step": 8519 }, { "epoch": 1.24, "grad_norm": 5.9611053466796875, "learning_rate": 1.345983404155118e-06, "loss": 0.4927, "step": 8520 }, { "epoch": 1.24, "grad_norm": 6.295010089874268, "learning_rate": 1.3458348926735033e-06, "loss": 0.5578, "step": 8521 }, { "epoch": 1.24, "grad_norm": 6.0368571281433105, "learning_rate": 1.3456863725275569e-06, "loss": 0.5309, "step": 8522 }, { "epoch": 1.24, "grad_norm": 5.930056095123291, "learning_rate": 1.3455378437210006e-06, "loss": 0.4936, "step": 8523 }, { "epoch": 1.24, "grad_norm": 6.429527759552002, "learning_rate": 1.3453893062575548e-06, "loss": 0.5736, "step": 8524 }, { "epoch": 1.24, "grad_norm": 5.455933570861816, "learning_rate": 1.3452407601409413e-06, "loss": 0.5413, "step": 8525 }, { "epoch": 1.24, "grad_norm": 6.399819850921631, "learning_rate": 1.3450922053748814e-06, "loss": 0.5468, "step": 8526 }, { "epoch": 1.24, "grad_norm": 7.008523464202881, "learning_rate": 1.344943641963097e-06, "loss": 0.5149, "step": 8527 }, { "epoch": 1.24, "grad_norm": 6.45647668838501, "learning_rate": 1.34479506990931e-06, "loss": 0.4969, "step": 8528 }, { "epoch": 1.24, "grad_norm": 5.786519527435303, "learning_rate": 1.344646489217243e-06, "loss": 0.5572, "step": 8529 }, { "epoch": 1.24, "grad_norm": 6.789677143096924, "learning_rate": 1.344497899890618e-06, "loss": 0.5586, "step": 8530 }, { "epoch": 1.24, "grad_norm": 6.2503862380981445, "learning_rate": 1.344349301933158e-06, "loss": 0.5708, "step": 8531 }, { "epoch": 1.24, "grad_norm": 6.637185573577881, "learning_rate": 1.3442006953485859e-06, "loss": 0.6039, "step": 8532 }, { "epoch": 1.24, "grad_norm": 6.208248615264893, "learning_rate": 1.3440520801406244e-06, "loss": 0.5422, "step": 8533 }, { "epoch": 1.24, "grad_norm": 6.642959117889404, "learning_rate": 1.343903456312997e-06, "loss": 0.5502, "step": 8534 }, { "epoch": 1.24, "grad_norm": 6.450814247131348, "learning_rate": 1.3437548238694273e-06, "loss": 0.5761, "step": 8535 }, { "epoch": 1.24, "grad_norm": 6.496753692626953, "learning_rate": 1.3436061828136392e-06, "loss": 0.5622, "step": 8536 }, { "epoch": 1.24, "grad_norm": 6.344904899597168, "learning_rate": 1.3434575331493564e-06, "loss": 0.5218, "step": 8537 }, { "epoch": 1.24, "grad_norm": 6.728682518005371, "learning_rate": 1.3433088748803033e-06, "loss": 0.5228, "step": 8538 }, { "epoch": 1.24, "grad_norm": 6.931544780731201, "learning_rate": 1.3431602080102042e-06, "loss": 0.5648, "step": 8539 }, { "epoch": 1.24, "grad_norm": 6.418793201446533, "learning_rate": 1.3430115325427834e-06, "loss": 0.5265, "step": 8540 }, { "epoch": 1.24, "grad_norm": 5.7973785400390625, "learning_rate": 1.342862848481766e-06, "loss": 0.5727, "step": 8541 }, { "epoch": 1.24, "grad_norm": 6.153471946716309, "learning_rate": 1.3427141558308771e-06, "loss": 0.5333, "step": 8542 }, { "epoch": 1.24, "grad_norm": 5.909811019897461, "learning_rate": 1.342565454593842e-06, "loss": 0.5147, "step": 8543 }, { "epoch": 1.24, "grad_norm": 6.728358268737793, "learning_rate": 1.342416744774386e-06, "loss": 0.566, "step": 8544 }, { "epoch": 1.24, "grad_norm": 5.475461483001709, "learning_rate": 1.3422680263762348e-06, "loss": 0.5468, "step": 8545 }, { "epoch": 1.24, "grad_norm": 6.378901958465576, "learning_rate": 1.342119299403114e-06, "loss": 0.5598, "step": 8546 }, { "epoch": 1.24, "grad_norm": 6.217058181762695, "learning_rate": 1.3419705638587505e-06, "loss": 0.4982, "step": 8547 }, { "epoch": 1.24, "grad_norm": 6.115771770477295, "learning_rate": 1.3418218197468699e-06, "loss": 0.5938, "step": 8548 }, { "epoch": 1.24, "grad_norm": 5.614343166351318, "learning_rate": 1.341673067071199e-06, "loss": 0.5391, "step": 8549 }, { "epoch": 1.24, "grad_norm": 6.823080062866211, "learning_rate": 1.3415243058354648e-06, "loss": 0.508, "step": 8550 }, { "epoch": 1.24, "grad_norm": 6.773425579071045, "learning_rate": 1.3413755360433939e-06, "loss": 0.4724, "step": 8551 }, { "epoch": 1.24, "grad_norm": 6.442663669586182, "learning_rate": 1.3412267576987138e-06, "loss": 0.5402, "step": 8552 }, { "epoch": 1.24, "grad_norm": 6.762567043304443, "learning_rate": 1.3410779708051517e-06, "loss": 0.5355, "step": 8553 }, { "epoch": 1.24, "grad_norm": 5.687742233276367, "learning_rate": 1.340929175366435e-06, "loss": 0.4882, "step": 8554 }, { "epoch": 1.24, "grad_norm": 7.041058540344238, "learning_rate": 1.3407803713862917e-06, "loss": 0.5505, "step": 8555 }, { "epoch": 1.24, "grad_norm": 7.179883003234863, "learning_rate": 1.34063155886845e-06, "loss": 0.5462, "step": 8556 }, { "epoch": 1.24, "grad_norm": 6.629645824432373, "learning_rate": 1.340482737816638e-06, "loss": 0.5243, "step": 8557 }, { "epoch": 1.24, "grad_norm": 6.164646148681641, "learning_rate": 1.3403339082345846e-06, "loss": 0.4805, "step": 8558 }, { "epoch": 1.24, "grad_norm": 6.331491947174072, "learning_rate": 1.3401850701260177e-06, "loss": 0.5019, "step": 8559 }, { "epoch": 1.24, "grad_norm": 6.416478633880615, "learning_rate": 1.3400362234946671e-06, "loss": 0.562, "step": 8560 }, { "epoch": 1.24, "grad_norm": 6.294068336486816, "learning_rate": 1.339887368344261e-06, "loss": 0.528, "step": 8561 }, { "epoch": 1.24, "grad_norm": 6.551115989685059, "learning_rate": 1.3397385046785293e-06, "loss": 0.5131, "step": 8562 }, { "epoch": 1.24, "grad_norm": 6.706343650817871, "learning_rate": 1.3395896325012012e-06, "loss": 0.6091, "step": 8563 }, { "epoch": 1.24, "grad_norm": 6.295380115509033, "learning_rate": 1.3394407518160067e-06, "loss": 0.6031, "step": 8564 }, { "epoch": 1.24, "grad_norm": 5.777491092681885, "learning_rate": 1.3392918626266758e-06, "loss": 0.5204, "step": 8565 }, { "epoch": 1.24, "grad_norm": 6.335196495056152, "learning_rate": 1.3391429649369385e-06, "loss": 0.4956, "step": 8566 }, { "epoch": 1.24, "grad_norm": 5.474491596221924, "learning_rate": 1.3389940587505255e-06, "loss": 0.5045, "step": 8567 }, { "epoch": 1.24, "grad_norm": 7.097557544708252, "learning_rate": 1.338845144071167e-06, "loss": 0.5735, "step": 8568 }, { "epoch": 1.24, "grad_norm": 6.963284015655518, "learning_rate": 1.338696220902594e-06, "loss": 0.5402, "step": 8569 }, { "epoch": 1.24, "grad_norm": 6.430004119873047, "learning_rate": 1.3385472892485371e-06, "loss": 0.5724, "step": 8570 }, { "epoch": 1.24, "grad_norm": 6.388340473175049, "learning_rate": 1.3383983491127285e-06, "loss": 0.5514, "step": 8571 }, { "epoch": 1.24, "grad_norm": 6.9443888664245605, "learning_rate": 1.3382494004988986e-06, "loss": 0.6266, "step": 8572 }, { "epoch": 1.24, "grad_norm": 7.145106315612793, "learning_rate": 1.33810044341078e-06, "loss": 0.5041, "step": 8573 }, { "epoch": 1.24, "grad_norm": 5.943277359008789, "learning_rate": 1.337951477852104e-06, "loss": 0.5981, "step": 8574 }, { "epoch": 1.24, "grad_norm": 5.852226257324219, "learning_rate": 1.3378025038266027e-06, "loss": 0.5281, "step": 8575 }, { "epoch": 1.24, "grad_norm": 6.878605842590332, "learning_rate": 1.3376535213380087e-06, "loss": 0.5066, "step": 8576 }, { "epoch": 1.24, "grad_norm": 6.839791774749756, "learning_rate": 1.3375045303900545e-06, "loss": 0.5451, "step": 8577 }, { "epoch": 1.24, "grad_norm": 6.346389293670654, "learning_rate": 1.3373555309864723e-06, "loss": 0.6259, "step": 8578 }, { "epoch": 1.24, "grad_norm": 6.747036933898926, "learning_rate": 1.3372065231309957e-06, "loss": 0.6575, "step": 8579 }, { "epoch": 1.24, "grad_norm": 6.150618553161621, "learning_rate": 1.3370575068273575e-06, "loss": 0.5209, "step": 8580 }, { "epoch": 1.25, "grad_norm": 6.149420261383057, "learning_rate": 1.3369084820792913e-06, "loss": 0.5982, "step": 8581 }, { "epoch": 1.25, "grad_norm": 6.253606796264648, "learning_rate": 1.3367594488905303e-06, "loss": 0.5496, "step": 8582 }, { "epoch": 1.25, "grad_norm": 7.113868236541748, "learning_rate": 1.3366104072648085e-06, "loss": 0.5681, "step": 8583 }, { "epoch": 1.25, "grad_norm": 6.1888556480407715, "learning_rate": 1.3364613572058599e-06, "loss": 0.5995, "step": 8584 }, { "epoch": 1.25, "grad_norm": 6.797128200531006, "learning_rate": 1.3363122987174186e-06, "loss": 0.571, "step": 8585 }, { "epoch": 1.25, "grad_norm": 6.963852882385254, "learning_rate": 1.3361632318032193e-06, "loss": 0.5519, "step": 8586 }, { "epoch": 1.25, "grad_norm": 7.402059078216553, "learning_rate": 1.3360141564669963e-06, "loss": 0.5594, "step": 8587 }, { "epoch": 1.25, "grad_norm": 6.227744102478027, "learning_rate": 1.3358650727124846e-06, "loss": 0.4914, "step": 8588 }, { "epoch": 1.25, "grad_norm": 6.258972644805908, "learning_rate": 1.3357159805434192e-06, "loss": 0.5228, "step": 8589 }, { "epoch": 1.25, "grad_norm": 6.264651298522949, "learning_rate": 1.3355668799635353e-06, "loss": 0.5902, "step": 8590 }, { "epoch": 1.25, "grad_norm": 6.446854114532471, "learning_rate": 1.3354177709765687e-06, "loss": 0.5075, "step": 8591 }, { "epoch": 1.25, "grad_norm": 6.7247490882873535, "learning_rate": 1.3352686535862546e-06, "loss": 0.5305, "step": 8592 }, { "epoch": 1.25, "grad_norm": 6.396578788757324, "learning_rate": 1.3351195277963295e-06, "loss": 0.5181, "step": 8593 }, { "epoch": 1.25, "grad_norm": 6.906451225280762, "learning_rate": 1.3349703936105288e-06, "loss": 0.5647, "step": 8594 }, { "epoch": 1.25, "grad_norm": 6.597583770751953, "learning_rate": 1.3348212510325894e-06, "loss": 0.5443, "step": 8595 }, { "epoch": 1.25, "grad_norm": 6.4134931564331055, "learning_rate": 1.3346721000662472e-06, "loss": 0.5622, "step": 8596 }, { "epoch": 1.25, "grad_norm": 6.673234462738037, "learning_rate": 1.3345229407152396e-06, "loss": 0.5802, "step": 8597 }, { "epoch": 1.25, "grad_norm": 6.585865497589111, "learning_rate": 1.3343737729833032e-06, "loss": 0.585, "step": 8598 }, { "epoch": 1.25, "grad_norm": 6.365670204162598, "learning_rate": 1.334224596874175e-06, "loss": 0.525, "step": 8599 }, { "epoch": 1.25, "grad_norm": 5.976195812225342, "learning_rate": 1.3340754123915927e-06, "loss": 0.5578, "step": 8600 }, { "epoch": 1.25, "grad_norm": 6.095615863800049, "learning_rate": 1.3339262195392942e-06, "loss": 0.5103, "step": 8601 }, { "epoch": 1.25, "grad_norm": 6.45170259475708, "learning_rate": 1.3337770183210167e-06, "loss": 0.6138, "step": 8602 }, { "epoch": 1.25, "grad_norm": 6.28248929977417, "learning_rate": 1.333627808740498e-06, "loss": 0.5362, "step": 8603 }, { "epoch": 1.25, "grad_norm": 6.115209579467773, "learning_rate": 1.3334785908014768e-06, "loss": 0.5257, "step": 8604 }, { "epoch": 1.25, "grad_norm": 6.317577838897705, "learning_rate": 1.3333293645076912e-06, "loss": 0.5239, "step": 8605 }, { "epoch": 1.25, "grad_norm": 5.888223171234131, "learning_rate": 1.3331801298628801e-06, "loss": 0.5476, "step": 8606 }, { "epoch": 1.25, "grad_norm": 7.241363048553467, "learning_rate": 1.3330308868707823e-06, "loss": 0.5097, "step": 8607 }, { "epoch": 1.25, "grad_norm": 6.783878803253174, "learning_rate": 1.332881635535137e-06, "loss": 0.5623, "step": 8608 }, { "epoch": 1.25, "grad_norm": 7.515369892120361, "learning_rate": 1.3327323758596827e-06, "loss": 0.5682, "step": 8609 }, { "epoch": 1.25, "grad_norm": 6.363761901855469, "learning_rate": 1.3325831078481595e-06, "loss": 0.562, "step": 8610 }, { "epoch": 1.25, "grad_norm": 6.728585720062256, "learning_rate": 1.3324338315043068e-06, "loss": 0.553, "step": 8611 }, { "epoch": 1.25, "grad_norm": 6.208942890167236, "learning_rate": 1.332284546831865e-06, "loss": 0.6176, "step": 8612 }, { "epoch": 1.25, "grad_norm": 6.116724014282227, "learning_rate": 1.3321352538345733e-06, "loss": 0.5301, "step": 8613 }, { "epoch": 1.25, "grad_norm": 7.141927242279053, "learning_rate": 1.3319859525161728e-06, "loss": 0.57, "step": 8614 }, { "epoch": 1.25, "grad_norm": 6.495441913604736, "learning_rate": 1.3318366428804035e-06, "loss": 0.5402, "step": 8615 }, { "epoch": 1.25, "grad_norm": 5.752162456512451, "learning_rate": 1.3316873249310066e-06, "loss": 0.5128, "step": 8616 }, { "epoch": 1.25, "grad_norm": 6.465684413909912, "learning_rate": 1.3315379986717226e-06, "loss": 0.5146, "step": 8617 }, { "epoch": 1.25, "grad_norm": 8.372540473937988, "learning_rate": 1.3313886641062925e-06, "loss": 0.6404, "step": 8618 }, { "epoch": 1.25, "grad_norm": 6.518949508666992, "learning_rate": 1.331239321238458e-06, "loss": 0.5632, "step": 8619 }, { "epoch": 1.25, "grad_norm": 6.364140510559082, "learning_rate": 1.3310899700719604e-06, "loss": 0.6259, "step": 8620 }, { "epoch": 1.25, "grad_norm": 5.770420074462891, "learning_rate": 1.3309406106105417e-06, "loss": 0.4792, "step": 8621 }, { "epoch": 1.25, "grad_norm": 7.058310031890869, "learning_rate": 1.3307912428579438e-06, "loss": 0.5602, "step": 8622 }, { "epoch": 1.25, "grad_norm": 7.319872856140137, "learning_rate": 1.3306418668179088e-06, "loss": 0.5416, "step": 8623 }, { "epoch": 1.25, "grad_norm": 6.4426774978637695, "learning_rate": 1.3304924824941788e-06, "loss": 0.4973, "step": 8624 }, { "epoch": 1.25, "grad_norm": 6.575377941131592, "learning_rate": 1.3303430898904969e-06, "loss": 0.5716, "step": 8625 }, { "epoch": 1.25, "grad_norm": 5.84700345993042, "learning_rate": 1.3301936890106053e-06, "loss": 0.5167, "step": 8626 }, { "epoch": 1.25, "grad_norm": 6.404903888702393, "learning_rate": 1.3300442798582476e-06, "loss": 0.5879, "step": 8627 }, { "epoch": 1.25, "grad_norm": 7.021148204803467, "learning_rate": 1.3298948624371666e-06, "loss": 0.5509, "step": 8628 }, { "epoch": 1.25, "grad_norm": 6.161064624786377, "learning_rate": 1.3297454367511063e-06, "loss": 0.5626, "step": 8629 }, { "epoch": 1.25, "grad_norm": 6.140016555786133, "learning_rate": 1.3295960028038093e-06, "loss": 0.5078, "step": 8630 }, { "epoch": 1.25, "grad_norm": 6.241817951202393, "learning_rate": 1.32944656059902e-06, "loss": 0.575, "step": 8631 }, { "epoch": 1.25, "grad_norm": 6.260481357574463, "learning_rate": 1.3292971101404828e-06, "loss": 0.6038, "step": 8632 }, { "epoch": 1.25, "grad_norm": 5.905740737915039, "learning_rate": 1.3291476514319416e-06, "loss": 0.5494, "step": 8633 }, { "epoch": 1.25, "grad_norm": 6.502418518066406, "learning_rate": 1.3289981844771403e-06, "loss": 0.5883, "step": 8634 }, { "epoch": 1.25, "grad_norm": 7.321013927459717, "learning_rate": 1.3288487092798244e-06, "loss": 0.5821, "step": 8635 }, { "epoch": 1.25, "grad_norm": 6.647543907165527, "learning_rate": 1.3286992258437383e-06, "loss": 0.5179, "step": 8636 }, { "epoch": 1.25, "grad_norm": 5.6189799308776855, "learning_rate": 1.3285497341726273e-06, "loss": 0.4949, "step": 8637 }, { "epoch": 1.25, "grad_norm": 6.421609878540039, "learning_rate": 1.3284002342702366e-06, "loss": 0.5621, "step": 8638 }, { "epoch": 1.25, "grad_norm": 6.970260143280029, "learning_rate": 1.3282507261403113e-06, "loss": 0.6167, "step": 8639 }, { "epoch": 1.25, "grad_norm": 6.720717430114746, "learning_rate": 1.3281012097865977e-06, "loss": 0.581, "step": 8640 }, { "epoch": 1.25, "grad_norm": 6.200062274932861, "learning_rate": 1.3279516852128412e-06, "loss": 0.539, "step": 8641 }, { "epoch": 1.25, "grad_norm": 6.466730117797852, "learning_rate": 1.3278021524227882e-06, "loss": 0.5758, "step": 8642 }, { "epoch": 1.25, "grad_norm": 6.339317798614502, "learning_rate": 1.3276526114201846e-06, "loss": 0.5286, "step": 8643 }, { "epoch": 1.25, "grad_norm": 5.66037654876709, "learning_rate": 1.327503062208778e-06, "loss": 0.4707, "step": 8644 }, { "epoch": 1.25, "grad_norm": 5.643578052520752, "learning_rate": 1.3273535047923137e-06, "loss": 0.5184, "step": 8645 }, { "epoch": 1.25, "grad_norm": 5.632147312164307, "learning_rate": 1.3272039391745394e-06, "loss": 0.5006, "step": 8646 }, { "epoch": 1.25, "grad_norm": 6.91608190536499, "learning_rate": 1.3270543653592018e-06, "loss": 0.5417, "step": 8647 }, { "epoch": 1.25, "grad_norm": 5.917779922485352, "learning_rate": 1.3269047833500487e-06, "loss": 0.4755, "step": 8648 }, { "epoch": 1.25, "grad_norm": 6.206212520599365, "learning_rate": 1.3267551931508273e-06, "loss": 0.5799, "step": 8649 }, { "epoch": 1.26, "grad_norm": 6.09740686416626, "learning_rate": 1.3266055947652857e-06, "loss": 0.5177, "step": 8650 }, { "epoch": 1.26, "grad_norm": 6.523914813995361, "learning_rate": 1.3264559881971712e-06, "loss": 0.5078, "step": 8651 }, { "epoch": 1.26, "grad_norm": 5.736111640930176, "learning_rate": 1.3263063734502326e-06, "loss": 0.5294, "step": 8652 }, { "epoch": 1.26, "grad_norm": 6.330262660980225, "learning_rate": 1.326156750528218e-06, "loss": 0.4899, "step": 8653 }, { "epoch": 1.26, "grad_norm": 5.764860153198242, "learning_rate": 1.3260071194348758e-06, "loss": 0.45, "step": 8654 }, { "epoch": 1.26, "grad_norm": 7.29411506652832, "learning_rate": 1.3258574801739552e-06, "loss": 0.5146, "step": 8655 }, { "epoch": 1.26, "grad_norm": 6.126056671142578, "learning_rate": 1.3257078327492046e-06, "loss": 0.5766, "step": 8656 }, { "epoch": 1.26, "grad_norm": 5.8121867179870605, "learning_rate": 1.325558177164374e-06, "loss": 0.5441, "step": 8657 }, { "epoch": 1.26, "grad_norm": 7.313930511474609, "learning_rate": 1.3254085134232119e-06, "loss": 0.5758, "step": 8658 }, { "epoch": 1.26, "grad_norm": 6.611668109893799, "learning_rate": 1.3252588415294681e-06, "loss": 0.5729, "step": 8659 }, { "epoch": 1.26, "grad_norm": 6.3615851402282715, "learning_rate": 1.3251091614868927e-06, "loss": 0.6032, "step": 8660 }, { "epoch": 1.26, "grad_norm": 5.953819751739502, "learning_rate": 1.3249594732992354e-06, "loss": 0.5926, "step": 8661 }, { "epoch": 1.26, "grad_norm": 5.6670074462890625, "learning_rate": 1.3248097769702465e-06, "loss": 0.5313, "step": 8662 }, { "epoch": 1.26, "grad_norm": 5.726655960083008, "learning_rate": 1.3246600725036765e-06, "loss": 0.5756, "step": 8663 }, { "epoch": 1.26, "grad_norm": 6.5917863845825195, "learning_rate": 1.3245103599032761e-06, "loss": 0.5434, "step": 8664 }, { "epoch": 1.26, "grad_norm": 5.747684478759766, "learning_rate": 1.3243606391727956e-06, "loss": 0.5461, "step": 8665 }, { "epoch": 1.26, "grad_norm": 6.1128973960876465, "learning_rate": 1.3242109103159864e-06, "loss": 0.5615, "step": 8666 }, { "epoch": 1.26, "grad_norm": 6.4264068603515625, "learning_rate": 1.3240611733365995e-06, "loss": 0.5904, "step": 8667 }, { "epoch": 1.26, "grad_norm": 6.225785255432129, "learning_rate": 1.3239114282383865e-06, "loss": 0.5904, "step": 8668 }, { "epoch": 1.26, "grad_norm": 7.584683418273926, "learning_rate": 1.323761675025099e-06, "loss": 0.6437, "step": 8669 }, { "epoch": 1.26, "grad_norm": 6.470776081085205, "learning_rate": 1.3236119137004887e-06, "loss": 0.5406, "step": 8670 }, { "epoch": 1.26, "grad_norm": 6.740689754486084, "learning_rate": 1.3234621442683079e-06, "loss": 0.5846, "step": 8671 }, { "epoch": 1.26, "grad_norm": 6.973162651062012, "learning_rate": 1.3233123667323088e-06, "loss": 0.5613, "step": 8672 }, { "epoch": 1.26, "grad_norm": 7.0016655921936035, "learning_rate": 1.3231625810962435e-06, "loss": 0.541, "step": 8673 }, { "epoch": 1.26, "grad_norm": 7.21013879776001, "learning_rate": 1.3230127873638646e-06, "loss": 0.6247, "step": 8674 }, { "epoch": 1.26, "grad_norm": 6.036802768707275, "learning_rate": 1.3228629855389253e-06, "loss": 0.5262, "step": 8675 }, { "epoch": 1.26, "grad_norm": 5.923150539398193, "learning_rate": 1.3227131756251782e-06, "loss": 0.5267, "step": 8676 }, { "epoch": 1.26, "grad_norm": 7.041107177734375, "learning_rate": 1.3225633576263775e-06, "loss": 0.5948, "step": 8677 }, { "epoch": 1.26, "grad_norm": 6.484447002410889, "learning_rate": 1.3224135315462754e-06, "loss": 0.5038, "step": 8678 }, { "epoch": 1.26, "grad_norm": 6.589084148406982, "learning_rate": 1.3222636973886263e-06, "loss": 0.5353, "step": 8679 }, { "epoch": 1.26, "grad_norm": 7.7154436111450195, "learning_rate": 1.3221138551571839e-06, "loss": 0.6004, "step": 8680 }, { "epoch": 1.26, "grad_norm": 6.674683570861816, "learning_rate": 1.3219640048557024e-06, "loss": 0.5277, "step": 8681 }, { "epoch": 1.26, "grad_norm": 6.233592987060547, "learning_rate": 1.3218141464879357e-06, "loss": 0.5423, "step": 8682 }, { "epoch": 1.26, "grad_norm": 6.1336493492126465, "learning_rate": 1.3216642800576385e-06, "loss": 0.4703, "step": 8683 }, { "epoch": 1.26, "grad_norm": 6.200333118438721, "learning_rate": 1.3215144055685654e-06, "loss": 0.577, "step": 8684 }, { "epoch": 1.26, "grad_norm": 6.669319152832031, "learning_rate": 1.3213645230244718e-06, "loss": 0.5302, "step": 8685 }, { "epoch": 1.26, "grad_norm": 6.580646514892578, "learning_rate": 1.3212146324291116e-06, "loss": 0.5152, "step": 8686 }, { "epoch": 1.26, "grad_norm": 6.019633769989014, "learning_rate": 1.3210647337862408e-06, "loss": 0.5285, "step": 8687 }, { "epoch": 1.26, "grad_norm": 7.606006145477295, "learning_rate": 1.320914827099615e-06, "loss": 0.5711, "step": 8688 }, { "epoch": 1.26, "grad_norm": 6.54686164855957, "learning_rate": 1.3207649123729894e-06, "loss": 0.5596, "step": 8689 }, { "epoch": 1.26, "grad_norm": 5.920634746551514, "learning_rate": 1.32061498961012e-06, "loss": 0.5028, "step": 8690 }, { "epoch": 1.26, "grad_norm": 6.729165077209473, "learning_rate": 1.3204650588147633e-06, "loss": 0.5636, "step": 8691 }, { "epoch": 1.26, "grad_norm": 5.6488237380981445, "learning_rate": 1.3203151199906753e-06, "loss": 0.5152, "step": 8692 }, { "epoch": 1.26, "grad_norm": 7.716064929962158, "learning_rate": 1.3201651731416121e-06, "loss": 0.6402, "step": 8693 }, { "epoch": 1.26, "grad_norm": 5.969008922576904, "learning_rate": 1.320015218271331e-06, "loss": 0.5226, "step": 8694 }, { "epoch": 1.26, "grad_norm": 6.680893421173096, "learning_rate": 1.3198652553835883e-06, "loss": 0.4846, "step": 8695 }, { "epoch": 1.26, "grad_norm": 7.584376811981201, "learning_rate": 1.3197152844821413e-06, "loss": 0.6324, "step": 8696 }, { "epoch": 1.26, "grad_norm": 8.567360877990723, "learning_rate": 1.3195653055707476e-06, "loss": 0.6071, "step": 8697 }, { "epoch": 1.26, "grad_norm": 6.760292053222656, "learning_rate": 1.3194153186531642e-06, "loss": 0.5633, "step": 8698 }, { "epoch": 1.26, "grad_norm": 5.796581745147705, "learning_rate": 1.3192653237331493e-06, "loss": 0.4824, "step": 8699 }, { "epoch": 1.26, "grad_norm": 6.558289527893066, "learning_rate": 1.3191153208144604e-06, "loss": 0.5444, "step": 8700 }, { "epoch": 1.26, "grad_norm": 6.7173871994018555, "learning_rate": 1.3189653099008554e-06, "loss": 0.5784, "step": 8701 }, { "epoch": 1.26, "grad_norm": 5.808367729187012, "learning_rate": 1.3188152909960927e-06, "loss": 0.5981, "step": 8702 }, { "epoch": 1.26, "grad_norm": 5.920062065124512, "learning_rate": 1.3186652641039311e-06, "loss": 0.5879, "step": 8703 }, { "epoch": 1.26, "grad_norm": 7.260566711425781, "learning_rate": 1.3185152292281289e-06, "loss": 0.513, "step": 8704 }, { "epoch": 1.26, "grad_norm": 6.162166118621826, "learning_rate": 1.318365186372446e-06, "loss": 0.5923, "step": 8705 }, { "epoch": 1.26, "grad_norm": 6.02237606048584, "learning_rate": 1.3182151355406396e-06, "loss": 0.5528, "step": 8706 }, { "epoch": 1.26, "grad_norm": 6.359384059906006, "learning_rate": 1.3180650767364707e-06, "loss": 0.4971, "step": 8707 }, { "epoch": 1.26, "grad_norm": 5.57103967666626, "learning_rate": 1.3179150099636978e-06, "loss": 0.4757, "step": 8708 }, { "epoch": 1.26, "grad_norm": 6.283668041229248, "learning_rate": 1.3177649352260808e-06, "loss": 0.5605, "step": 8709 }, { "epoch": 1.26, "grad_norm": 6.524988174438477, "learning_rate": 1.3176148525273798e-06, "loss": 0.5789, "step": 8710 }, { "epoch": 1.26, "grad_norm": 6.23171854019165, "learning_rate": 1.3174647618713547e-06, "loss": 0.5006, "step": 8711 }, { "epoch": 1.26, "grad_norm": 6.388073921203613, "learning_rate": 1.317314663261766e-06, "loss": 0.4655, "step": 8712 }, { "epoch": 1.26, "grad_norm": 6.361271858215332, "learning_rate": 1.317164556702374e-06, "loss": 0.5331, "step": 8713 }, { "epoch": 1.26, "grad_norm": 5.993545055389404, "learning_rate": 1.3170144421969391e-06, "loss": 0.6146, "step": 8714 }, { "epoch": 1.26, "grad_norm": 6.576188087463379, "learning_rate": 1.3168643197492226e-06, "loss": 0.5755, "step": 8715 }, { "epoch": 1.26, "grad_norm": 6.862825870513916, "learning_rate": 1.3167141893629853e-06, "loss": 0.5959, "step": 8716 }, { "epoch": 1.26, "grad_norm": 6.249495029449463, "learning_rate": 1.3165640510419886e-06, "loss": 0.4786, "step": 8717 }, { "epoch": 1.26, "grad_norm": 7.57574987411499, "learning_rate": 1.316413904789994e-06, "loss": 0.6142, "step": 8718 }, { "epoch": 1.27, "grad_norm": 6.075758934020996, "learning_rate": 1.3162637506107633e-06, "loss": 0.5631, "step": 8719 }, { "epoch": 1.27, "grad_norm": 5.58536434173584, "learning_rate": 1.3161135885080583e-06, "loss": 0.5252, "step": 8720 }, { "epoch": 1.27, "grad_norm": 6.046807289123535, "learning_rate": 1.3159634184856407e-06, "loss": 0.5679, "step": 8721 }, { "epoch": 1.27, "grad_norm": 6.511199951171875, "learning_rate": 1.315813240547273e-06, "loss": 0.5552, "step": 8722 }, { "epoch": 1.27, "grad_norm": 6.959301471710205, "learning_rate": 1.3156630546967178e-06, "loss": 0.5614, "step": 8723 }, { "epoch": 1.27, "grad_norm": 6.242414951324463, "learning_rate": 1.3155128609377375e-06, "loss": 0.5598, "step": 8724 }, { "epoch": 1.27, "grad_norm": 6.242105484008789, "learning_rate": 1.3153626592740953e-06, "loss": 0.5897, "step": 8725 }, { "epoch": 1.27, "grad_norm": 6.503330707550049, "learning_rate": 1.315212449709554e-06, "loss": 0.4618, "step": 8726 }, { "epoch": 1.27, "grad_norm": 7.7820658683776855, "learning_rate": 1.3150622322478773e-06, "loss": 0.6423, "step": 8727 }, { "epoch": 1.27, "grad_norm": 6.163993835449219, "learning_rate": 1.3149120068928278e-06, "loss": 0.5406, "step": 8728 }, { "epoch": 1.27, "grad_norm": 6.349727630615234, "learning_rate": 1.3147617736481698e-06, "loss": 0.5123, "step": 8729 }, { "epoch": 1.27, "grad_norm": 6.13210916519165, "learning_rate": 1.314611532517667e-06, "loss": 0.473, "step": 8730 }, { "epoch": 1.27, "grad_norm": 5.675718307495117, "learning_rate": 1.3144612835050836e-06, "loss": 0.5057, "step": 8731 }, { "epoch": 1.27, "grad_norm": 5.96416711807251, "learning_rate": 1.3143110266141832e-06, "loss": 0.5614, "step": 8732 }, { "epoch": 1.27, "grad_norm": 7.014461994171143, "learning_rate": 1.3141607618487314e-06, "loss": 0.603, "step": 8733 }, { "epoch": 1.27, "grad_norm": 7.024354457855225, "learning_rate": 1.3140104892124918e-06, "loss": 0.517, "step": 8734 }, { "epoch": 1.27, "grad_norm": 6.943045616149902, "learning_rate": 1.3138602087092297e-06, "loss": 0.5019, "step": 8735 }, { "epoch": 1.27, "grad_norm": 6.4716291427612305, "learning_rate": 1.31370992034271e-06, "loss": 0.5532, "step": 8736 }, { "epoch": 1.27, "grad_norm": 6.418993949890137, "learning_rate": 1.313559624116698e-06, "loss": 0.5637, "step": 8737 }, { "epoch": 1.27, "grad_norm": 5.524827480316162, "learning_rate": 1.3134093200349589e-06, "loss": 0.513, "step": 8738 }, { "epoch": 1.27, "grad_norm": 6.756519794464111, "learning_rate": 1.3132590081012587e-06, "loss": 0.6253, "step": 8739 }, { "epoch": 1.27, "grad_norm": 6.521120071411133, "learning_rate": 1.313108688319363e-06, "loss": 0.527, "step": 8740 }, { "epoch": 1.27, "grad_norm": 5.676196098327637, "learning_rate": 1.312958360693038e-06, "loss": 0.543, "step": 8741 }, { "epoch": 1.27, "grad_norm": 7.916275501251221, "learning_rate": 1.3128080252260497e-06, "loss": 0.636, "step": 8742 }, { "epoch": 1.27, "grad_norm": 7.573893070220947, "learning_rate": 1.3126576819221646e-06, "loss": 0.5529, "step": 8743 }, { "epoch": 1.27, "grad_norm": 5.709453105926514, "learning_rate": 1.3125073307851494e-06, "loss": 0.5256, "step": 8744 }, { "epoch": 1.27, "grad_norm": 6.3918070793151855, "learning_rate": 1.3123569718187706e-06, "loss": 0.4978, "step": 8745 }, { "epoch": 1.27, "grad_norm": 6.549385070800781, "learning_rate": 1.3122066050267955e-06, "loss": 0.4611, "step": 8746 }, { "epoch": 1.27, "grad_norm": 5.902108669281006, "learning_rate": 1.312056230412991e-06, "loss": 0.4343, "step": 8747 }, { "epoch": 1.27, "grad_norm": 6.289979457855225, "learning_rate": 1.311905847981125e-06, "loss": 0.496, "step": 8748 }, { "epoch": 1.27, "grad_norm": 6.191089630126953, "learning_rate": 1.3117554577349645e-06, "loss": 0.5461, "step": 8749 }, { "epoch": 1.27, "grad_norm": 6.401366710662842, "learning_rate": 1.3116050596782776e-06, "loss": 0.5716, "step": 8750 }, { "epoch": 1.27, "grad_norm": 6.856176853179932, "learning_rate": 1.3114546538148324e-06, "loss": 0.5663, "step": 8751 }, { "epoch": 1.27, "grad_norm": 6.699294090270996, "learning_rate": 1.311304240148397e-06, "loss": 0.5108, "step": 8752 }, { "epoch": 1.27, "grad_norm": 6.946615219116211, "learning_rate": 1.3111538186827393e-06, "loss": 0.6437, "step": 8753 }, { "epoch": 1.27, "grad_norm": 5.9393415451049805, "learning_rate": 1.3110033894216284e-06, "loss": 0.542, "step": 8754 }, { "epoch": 1.27, "grad_norm": 6.595680236816406, "learning_rate": 1.310852952368833e-06, "loss": 0.5322, "step": 8755 }, { "epoch": 1.27, "grad_norm": 7.004356384277344, "learning_rate": 1.3107025075281218e-06, "loss": 0.6591, "step": 8756 }, { "epoch": 1.27, "grad_norm": 6.5440216064453125, "learning_rate": 1.3105520549032642e-06, "loss": 0.5004, "step": 8757 }, { "epoch": 1.27, "grad_norm": 5.465989112854004, "learning_rate": 1.3104015944980296e-06, "loss": 0.4412, "step": 8758 }, { "epoch": 1.27, "grad_norm": 6.754043102264404, "learning_rate": 1.3102511263161872e-06, "loss": 0.4894, "step": 8759 }, { "epoch": 1.27, "grad_norm": 6.38107967376709, "learning_rate": 1.3101006503615069e-06, "loss": 0.5692, "step": 8760 }, { "epoch": 1.27, "grad_norm": 6.860391139984131, "learning_rate": 1.3099501666377589e-06, "loss": 0.58, "step": 8761 }, { "epoch": 1.27, "grad_norm": 5.830982208251953, "learning_rate": 1.309799675148713e-06, "loss": 0.48, "step": 8762 }, { "epoch": 1.27, "grad_norm": 7.028561115264893, "learning_rate": 1.3096491758981394e-06, "loss": 0.5511, "step": 8763 }, { "epoch": 1.27, "grad_norm": 7.399848937988281, "learning_rate": 1.3094986688898091e-06, "loss": 0.5425, "step": 8764 }, { "epoch": 1.27, "grad_norm": 5.914048194885254, "learning_rate": 1.3093481541274925e-06, "loss": 0.5235, "step": 8765 }, { "epoch": 1.27, "grad_norm": 6.5060014724731445, "learning_rate": 1.3091976316149604e-06, "loss": 0.6235, "step": 8766 }, { "epoch": 1.27, "grad_norm": 6.301280975341797, "learning_rate": 1.309047101355984e-06, "loss": 0.5153, "step": 8767 }, { "epoch": 1.27, "grad_norm": 6.05374813079834, "learning_rate": 1.308896563354335e-06, "loss": 0.5086, "step": 8768 }, { "epoch": 1.27, "grad_norm": 6.621715068817139, "learning_rate": 1.308746017613784e-06, "loss": 0.4967, "step": 8769 }, { "epoch": 1.27, "grad_norm": 6.497140884399414, "learning_rate": 1.3085954641381035e-06, "loss": 0.5786, "step": 8770 }, { "epoch": 1.27, "grad_norm": 6.4855122566223145, "learning_rate": 1.308444902931065e-06, "loss": 0.5683, "step": 8771 }, { "epoch": 1.27, "grad_norm": 6.867970943450928, "learning_rate": 1.3082943339964408e-06, "loss": 0.4989, "step": 8772 }, { "epoch": 1.27, "grad_norm": 6.496945858001709, "learning_rate": 1.3081437573380026e-06, "loss": 0.6037, "step": 8773 }, { "epoch": 1.27, "grad_norm": 6.61264705657959, "learning_rate": 1.3079931729595232e-06, "loss": 0.5151, "step": 8774 }, { "epoch": 1.27, "grad_norm": 6.762310981750488, "learning_rate": 1.3078425808647756e-06, "loss": 0.5939, "step": 8775 }, { "epoch": 1.27, "grad_norm": 6.073391914367676, "learning_rate": 1.3076919810575326e-06, "loss": 0.5439, "step": 8776 }, { "epoch": 1.27, "grad_norm": 6.551926612854004, "learning_rate": 1.3075413735415666e-06, "loss": 0.5532, "step": 8777 }, { "epoch": 1.27, "grad_norm": 6.650381565093994, "learning_rate": 1.3073907583206512e-06, "loss": 0.5085, "step": 8778 }, { "epoch": 1.27, "grad_norm": 6.636277198791504, "learning_rate": 1.3072401353985597e-06, "loss": 0.5893, "step": 8779 }, { "epoch": 1.27, "grad_norm": 6.532528400421143, "learning_rate": 1.307089504779066e-06, "loss": 0.5567, "step": 8780 }, { "epoch": 1.27, "grad_norm": 6.52390718460083, "learning_rate": 1.3069388664659434e-06, "loss": 0.5564, "step": 8781 }, { "epoch": 1.27, "grad_norm": 6.508464813232422, "learning_rate": 1.3067882204629666e-06, "loss": 0.5525, "step": 8782 }, { "epoch": 1.27, "grad_norm": 6.146102428436279, "learning_rate": 1.3066375667739092e-06, "loss": 0.553, "step": 8783 }, { "epoch": 1.27, "grad_norm": 6.737680435180664, "learning_rate": 1.306486905402546e-06, "loss": 0.5586, "step": 8784 }, { "epoch": 1.27, "grad_norm": 6.201819896697998, "learning_rate": 1.3063362363526512e-06, "loss": 0.513, "step": 8785 }, { "epoch": 1.27, "grad_norm": 6.204232215881348, "learning_rate": 1.3061855596279999e-06, "loss": 0.5785, "step": 8786 }, { "epoch": 1.27, "grad_norm": 6.650699615478516, "learning_rate": 1.3060348752323666e-06, "loss": 0.5579, "step": 8787 }, { "epoch": 1.28, "grad_norm": 6.515753746032715, "learning_rate": 1.305884183169527e-06, "loss": 0.5314, "step": 8788 }, { "epoch": 1.28, "grad_norm": 6.349321365356445, "learning_rate": 1.3057334834432564e-06, "loss": 0.5248, "step": 8789 }, { "epoch": 1.28, "grad_norm": 6.643898963928223, "learning_rate": 1.30558277605733e-06, "loss": 0.5733, "step": 8790 }, { "epoch": 1.28, "grad_norm": 6.272275447845459, "learning_rate": 1.3054320610155233e-06, "loss": 0.4655, "step": 8791 }, { "epoch": 1.28, "grad_norm": 6.229631423950195, "learning_rate": 1.305281338321613e-06, "loss": 0.5622, "step": 8792 }, { "epoch": 1.28, "grad_norm": 5.959776878356934, "learning_rate": 1.3051306079793746e-06, "loss": 0.5465, "step": 8793 }, { "epoch": 1.28, "grad_norm": 6.633268356323242, "learning_rate": 1.3049798699925847e-06, "loss": 0.4995, "step": 8794 }, { "epoch": 1.28, "grad_norm": 6.6096086502075195, "learning_rate": 1.3048291243650195e-06, "loss": 0.5815, "step": 8795 }, { "epoch": 1.28, "grad_norm": 6.283664226531982, "learning_rate": 1.3046783711004564e-06, "loss": 0.5859, "step": 8796 }, { "epoch": 1.28, "grad_norm": 6.30371618270874, "learning_rate": 1.3045276102026712e-06, "loss": 0.5597, "step": 8797 }, { "epoch": 1.28, "grad_norm": 6.701570987701416, "learning_rate": 1.3043768416754418e-06, "loss": 0.5042, "step": 8798 }, { "epoch": 1.28, "grad_norm": 8.022608757019043, "learning_rate": 1.304226065522545e-06, "loss": 0.564, "step": 8799 }, { "epoch": 1.28, "grad_norm": 7.297051906585693, "learning_rate": 1.3040752817477587e-06, "loss": 0.6955, "step": 8800 }, { "epoch": 1.28, "grad_norm": 6.554471492767334, "learning_rate": 1.30392449035486e-06, "loss": 0.497, "step": 8801 }, { "epoch": 1.28, "grad_norm": 5.7017598152160645, "learning_rate": 1.303773691347627e-06, "loss": 0.5639, "step": 8802 }, { "epoch": 1.28, "grad_norm": 6.704639434814453, "learning_rate": 1.303622884729838e-06, "loss": 0.632, "step": 8803 }, { "epoch": 1.28, "grad_norm": 6.092327117919922, "learning_rate": 1.3034720705052712e-06, "loss": 0.4479, "step": 8804 }, { "epoch": 1.28, "grad_norm": 6.094475269317627, "learning_rate": 1.3033212486777046e-06, "loss": 0.496, "step": 8805 }, { "epoch": 1.28, "grad_norm": 6.597798824310303, "learning_rate": 1.3031704192509168e-06, "loss": 0.6217, "step": 8806 }, { "epoch": 1.28, "grad_norm": 7.848248481750488, "learning_rate": 1.3030195822286866e-06, "loss": 0.5917, "step": 8807 }, { "epoch": 1.28, "grad_norm": 7.052003383636475, "learning_rate": 1.3028687376147929e-06, "loss": 0.5339, "step": 8808 }, { "epoch": 1.28, "grad_norm": 5.816452980041504, "learning_rate": 1.3027178854130156e-06, "loss": 0.5817, "step": 8809 }, { "epoch": 1.28, "grad_norm": 6.984280586242676, "learning_rate": 1.3025670256271334e-06, "loss": 0.5189, "step": 8810 }, { "epoch": 1.28, "grad_norm": 6.857644081115723, "learning_rate": 1.3024161582609261e-06, "loss": 0.5217, "step": 8811 }, { "epoch": 1.28, "grad_norm": 6.6985626220703125, "learning_rate": 1.302265283318173e-06, "loss": 0.62, "step": 8812 }, { "epoch": 1.28, "grad_norm": 6.011447906494141, "learning_rate": 1.3021144008026547e-06, "loss": 0.524, "step": 8813 }, { "epoch": 1.28, "grad_norm": 6.834537029266357, "learning_rate": 1.301963510718151e-06, "loss": 0.5588, "step": 8814 }, { "epoch": 1.28, "grad_norm": 5.673475742340088, "learning_rate": 1.3018126130684418e-06, "loss": 0.4737, "step": 8815 }, { "epoch": 1.28, "grad_norm": 7.123545169830322, "learning_rate": 1.3016617078573082e-06, "loss": 0.4893, "step": 8816 }, { "epoch": 1.28, "grad_norm": 6.248000621795654, "learning_rate": 1.3015107950885309e-06, "loss": 0.5728, "step": 8817 }, { "epoch": 1.28, "grad_norm": 5.974113464355469, "learning_rate": 1.3013598747658902e-06, "loss": 0.5519, "step": 8818 }, { "epoch": 1.28, "grad_norm": 6.449129581451416, "learning_rate": 1.3012089468931676e-06, "loss": 0.5951, "step": 8819 }, { "epoch": 1.28, "grad_norm": 6.40690279006958, "learning_rate": 1.3010580114741442e-06, "loss": 0.5389, "step": 8820 }, { "epoch": 1.28, "grad_norm": 6.7273850440979, "learning_rate": 1.3009070685126016e-06, "loss": 0.597, "step": 8821 }, { "epoch": 1.28, "grad_norm": 6.709827423095703, "learning_rate": 1.3007561180123211e-06, "loss": 0.5663, "step": 8822 }, { "epoch": 1.28, "grad_norm": 6.662168979644775, "learning_rate": 1.3006051599770852e-06, "loss": 0.5661, "step": 8823 }, { "epoch": 1.28, "grad_norm": 6.310754299163818, "learning_rate": 1.3004541944106752e-06, "loss": 0.4895, "step": 8824 }, { "epoch": 1.28, "grad_norm": 5.75963830947876, "learning_rate": 1.3003032213168733e-06, "loss": 0.473, "step": 8825 }, { "epoch": 1.28, "grad_norm": 5.869238376617432, "learning_rate": 1.3001522406994625e-06, "loss": 0.5329, "step": 8826 }, { "epoch": 1.28, "grad_norm": 5.9402947425842285, "learning_rate": 1.3000012525622248e-06, "loss": 0.5187, "step": 8827 }, { "epoch": 1.28, "grad_norm": 5.751946926116943, "learning_rate": 1.299850256908943e-06, "loss": 0.5127, "step": 8828 }, { "epoch": 1.28, "grad_norm": 6.7388834953308105, "learning_rate": 1.2996992537434003e-06, "loss": 0.5261, "step": 8829 }, { "epoch": 1.28, "grad_norm": 7.563799858093262, "learning_rate": 1.29954824306938e-06, "loss": 0.6105, "step": 8830 }, { "epoch": 1.28, "grad_norm": 6.291154384613037, "learning_rate": 1.299397224890665e-06, "loss": 0.5316, "step": 8831 }, { "epoch": 1.28, "grad_norm": 8.070441246032715, "learning_rate": 1.2992461992110393e-06, "loss": 0.5668, "step": 8832 }, { "epoch": 1.28, "grad_norm": 6.942329406738281, "learning_rate": 1.2990951660342858e-06, "loss": 0.5418, "step": 8833 }, { "epoch": 1.28, "grad_norm": 6.227302074432373, "learning_rate": 1.2989441253641893e-06, "loss": 0.4714, "step": 8834 }, { "epoch": 1.28, "grad_norm": 6.174405097961426, "learning_rate": 1.2987930772045332e-06, "loss": 0.5099, "step": 8835 }, { "epoch": 1.28, "grad_norm": 6.350735664367676, "learning_rate": 1.2986420215591021e-06, "loss": 0.5365, "step": 8836 }, { "epoch": 1.28, "grad_norm": 7.491817951202393, "learning_rate": 1.2984909584316808e-06, "loss": 0.6321, "step": 8837 }, { "epoch": 1.28, "grad_norm": 7.0196051597595215, "learning_rate": 1.2983398878260529e-06, "loss": 0.574, "step": 8838 }, { "epoch": 1.28, "grad_norm": 6.68887186050415, "learning_rate": 1.298188809746004e-06, "loss": 0.4955, "step": 8839 }, { "epoch": 1.28, "grad_norm": 6.038766384124756, "learning_rate": 1.2980377241953194e-06, "loss": 0.5127, "step": 8840 }, { "epoch": 1.28, "grad_norm": 6.250296115875244, "learning_rate": 1.2978866311777833e-06, "loss": 0.5328, "step": 8841 }, { "epoch": 1.28, "grad_norm": 7.005230903625488, "learning_rate": 1.2977355306971822e-06, "loss": 0.6541, "step": 8842 }, { "epoch": 1.28, "grad_norm": 6.420970916748047, "learning_rate": 1.2975844227573007e-06, "loss": 0.5351, "step": 8843 }, { "epoch": 1.28, "grad_norm": 8.17007064819336, "learning_rate": 1.2974333073619254e-06, "loss": 0.671, "step": 8844 }, { "epoch": 1.28, "grad_norm": 6.426352500915527, "learning_rate": 1.2972821845148419e-06, "loss": 0.5681, "step": 8845 }, { "epoch": 1.28, "grad_norm": 6.749971866607666, "learning_rate": 1.297131054219836e-06, "loss": 0.5784, "step": 8846 }, { "epoch": 1.28, "grad_norm": 5.885578632354736, "learning_rate": 1.2969799164806944e-06, "loss": 0.536, "step": 8847 }, { "epoch": 1.28, "grad_norm": 5.996361255645752, "learning_rate": 1.2968287713012035e-06, "loss": 0.5263, "step": 8848 }, { "epoch": 1.28, "grad_norm": 5.7087788581848145, "learning_rate": 1.29667761868515e-06, "loss": 0.558, "step": 8849 }, { "epoch": 1.28, "grad_norm": 6.818168640136719, "learning_rate": 1.2965264586363208e-06, "loss": 0.5342, "step": 8850 }, { "epoch": 1.28, "grad_norm": 6.255831718444824, "learning_rate": 1.2963752911585034e-06, "loss": 0.5705, "step": 8851 }, { "epoch": 1.28, "grad_norm": 7.067400932312012, "learning_rate": 1.2962241162554844e-06, "loss": 0.5645, "step": 8852 }, { "epoch": 1.28, "grad_norm": 6.45482063293457, "learning_rate": 1.2960729339310513e-06, "loss": 0.5243, "step": 8853 }, { "epoch": 1.28, "grad_norm": 6.402726173400879, "learning_rate": 1.295921744188992e-06, "loss": 0.4697, "step": 8854 }, { "epoch": 1.28, "grad_norm": 6.697022438049316, "learning_rate": 1.2957705470330944e-06, "loss": 0.536, "step": 8855 }, { "epoch": 1.28, "grad_norm": 6.577726364135742, "learning_rate": 1.295619342467146e-06, "loss": 0.5302, "step": 8856 }, { "epoch": 1.29, "grad_norm": 6.278139591217041, "learning_rate": 1.2954681304949355e-06, "loss": 0.5568, "step": 8857 }, { "epoch": 1.29, "grad_norm": 5.764012813568115, "learning_rate": 1.2953169111202509e-06, "loss": 0.4897, "step": 8858 }, { "epoch": 1.29, "grad_norm": 6.039440155029297, "learning_rate": 1.2951656843468813e-06, "loss": 0.5962, "step": 8859 }, { "epoch": 1.29, "grad_norm": 6.523921012878418, "learning_rate": 1.2950144501786147e-06, "loss": 0.5369, "step": 8860 }, { "epoch": 1.29, "grad_norm": 7.040672779083252, "learning_rate": 1.2948632086192404e-06, "loss": 0.5712, "step": 8861 }, { "epoch": 1.29, "grad_norm": 6.216028213500977, "learning_rate": 1.2947119596725477e-06, "loss": 0.5508, "step": 8862 }, { "epoch": 1.29, "grad_norm": 6.325887680053711, "learning_rate": 1.2945607033423255e-06, "loss": 0.5141, "step": 8863 }, { "epoch": 1.29, "grad_norm": 5.759422779083252, "learning_rate": 1.2944094396323638e-06, "loss": 0.4612, "step": 8864 }, { "epoch": 1.29, "grad_norm": 7.501740455627441, "learning_rate": 1.294258168546452e-06, "loss": 0.5321, "step": 8865 }, { "epoch": 1.29, "grad_norm": 7.04349946975708, "learning_rate": 1.2941068900883797e-06, "loss": 0.5699, "step": 8866 }, { "epoch": 1.29, "grad_norm": 5.746974468231201, "learning_rate": 1.293955604261937e-06, "loss": 0.4899, "step": 8867 }, { "epoch": 1.29, "grad_norm": 5.775261878967285, "learning_rate": 1.2938043110709145e-06, "loss": 0.4844, "step": 8868 }, { "epoch": 1.29, "grad_norm": 7.13498067855835, "learning_rate": 1.2936530105191022e-06, "loss": 0.5329, "step": 8869 }, { "epoch": 1.29, "grad_norm": 7.356235027313232, "learning_rate": 1.2935017026102909e-06, "loss": 0.5262, "step": 8870 }, { "epoch": 1.29, "grad_norm": 5.656938076019287, "learning_rate": 1.2933503873482715e-06, "loss": 0.5259, "step": 8871 }, { "epoch": 1.29, "grad_norm": 6.248610973358154, "learning_rate": 1.2931990647368345e-06, "loss": 0.5663, "step": 8872 }, { "epoch": 1.29, "grad_norm": 7.724830150604248, "learning_rate": 1.2930477347797716e-06, "loss": 0.5765, "step": 8873 }, { "epoch": 1.29, "grad_norm": 6.491677284240723, "learning_rate": 1.2928963974808738e-06, "loss": 0.5253, "step": 8874 }, { "epoch": 1.29, "grad_norm": 6.225951194763184, "learning_rate": 1.2927450528439325e-06, "loss": 0.5663, "step": 8875 }, { "epoch": 1.29, "grad_norm": 7.007562637329102, "learning_rate": 1.2925937008727397e-06, "loss": 0.5861, "step": 8876 }, { "epoch": 1.29, "grad_norm": 6.095150947570801, "learning_rate": 1.2924423415710873e-06, "loss": 0.5223, "step": 8877 }, { "epoch": 1.29, "grad_norm": 6.347977161407471, "learning_rate": 1.292290974942767e-06, "loss": 0.5329, "step": 8878 }, { "epoch": 1.29, "grad_norm": 6.303990364074707, "learning_rate": 1.292139600991571e-06, "loss": 0.5771, "step": 8879 }, { "epoch": 1.29, "grad_norm": 6.304628372192383, "learning_rate": 1.2919882197212926e-06, "loss": 0.5351, "step": 8880 }, { "epoch": 1.29, "grad_norm": 5.76704216003418, "learning_rate": 1.2918368311357232e-06, "loss": 0.5142, "step": 8881 }, { "epoch": 1.29, "grad_norm": 6.131785869598389, "learning_rate": 1.2916854352386565e-06, "loss": 0.5128, "step": 8882 }, { "epoch": 1.29, "grad_norm": 6.301549911499023, "learning_rate": 1.291534032033885e-06, "loss": 0.5297, "step": 8883 }, { "epoch": 1.29, "grad_norm": 6.166223526000977, "learning_rate": 1.291382621525202e-06, "loss": 0.5727, "step": 8884 }, { "epoch": 1.29, "grad_norm": 6.418377876281738, "learning_rate": 1.2912312037164008e-06, "loss": 0.6395, "step": 8885 }, { "epoch": 1.29, "grad_norm": 5.673855304718018, "learning_rate": 1.2910797786112752e-06, "loss": 0.4776, "step": 8886 }, { "epoch": 1.29, "grad_norm": 5.793074607849121, "learning_rate": 1.2909283462136187e-06, "loss": 0.5041, "step": 8887 }, { "epoch": 1.29, "grad_norm": 8.129451751708984, "learning_rate": 1.2907769065272251e-06, "loss": 0.5544, "step": 8888 }, { "epoch": 1.29, "grad_norm": 5.9702863693237305, "learning_rate": 1.2906254595558888e-06, "loss": 0.5305, "step": 8889 }, { "epoch": 1.29, "grad_norm": 6.401578903198242, "learning_rate": 1.2904740053034036e-06, "loss": 0.5544, "step": 8890 }, { "epoch": 1.29, "grad_norm": 6.395144462585449, "learning_rate": 1.2903225437735642e-06, "loss": 0.596, "step": 8891 }, { "epoch": 1.29, "grad_norm": 6.631081581115723, "learning_rate": 1.2901710749701651e-06, "loss": 0.4933, "step": 8892 }, { "epoch": 1.29, "grad_norm": 5.493551731109619, "learning_rate": 1.2900195988970012e-06, "loss": 0.5077, "step": 8893 }, { "epoch": 1.29, "grad_norm": 5.837223529815674, "learning_rate": 1.2898681155578677e-06, "loss": 0.5214, "step": 8894 }, { "epoch": 1.29, "grad_norm": 7.107722282409668, "learning_rate": 1.2897166249565595e-06, "loss": 0.5474, "step": 8895 }, { "epoch": 1.29, "grad_norm": 6.756750106811523, "learning_rate": 1.2895651270968717e-06, "loss": 0.5153, "step": 8896 }, { "epoch": 1.29, "grad_norm": 6.286183834075928, "learning_rate": 1.2894136219826004e-06, "loss": 0.5317, "step": 8897 }, { "epoch": 1.29, "grad_norm": 5.8714375495910645, "learning_rate": 1.2892621096175412e-06, "loss": 0.5044, "step": 8898 }, { "epoch": 1.29, "grad_norm": 6.356095790863037, "learning_rate": 1.2891105900054896e-06, "loss": 0.4847, "step": 8899 }, { "epoch": 1.29, "grad_norm": 7.2414445877075195, "learning_rate": 1.2889590631502417e-06, "loss": 0.5806, "step": 8900 }, { "epoch": 1.29, "grad_norm": 6.779908657073975, "learning_rate": 1.2888075290555947e-06, "loss": 0.5916, "step": 8901 }, { "epoch": 1.29, "grad_norm": 6.293110370635986, "learning_rate": 1.2886559877253435e-06, "loss": 0.5306, "step": 8902 }, { "epoch": 1.29, "grad_norm": 6.874567985534668, "learning_rate": 1.2885044391632863e-06, "loss": 0.4944, "step": 8903 }, { "epoch": 1.29, "grad_norm": 6.148439884185791, "learning_rate": 1.2883528833732185e-06, "loss": 0.5181, "step": 8904 }, { "epoch": 1.29, "grad_norm": 6.876068115234375, "learning_rate": 1.288201320358938e-06, "loss": 0.5199, "step": 8905 }, { "epoch": 1.29, "grad_norm": 6.299640655517578, "learning_rate": 1.2880497501242418e-06, "loss": 0.5436, "step": 8906 }, { "epoch": 1.29, "grad_norm": 6.3255791664123535, "learning_rate": 1.2878981726729267e-06, "loss": 0.5126, "step": 8907 }, { "epoch": 1.29, "grad_norm": 6.571667194366455, "learning_rate": 1.2877465880087913e-06, "loss": 0.5655, "step": 8908 }, { "epoch": 1.29, "grad_norm": 7.049774169921875, "learning_rate": 1.2875949961356324e-06, "loss": 0.6432, "step": 8909 }, { "epoch": 1.29, "grad_norm": 5.596050262451172, "learning_rate": 1.2874433970572483e-06, "loss": 0.4729, "step": 8910 }, { "epoch": 1.29, "grad_norm": 6.605506420135498, "learning_rate": 1.2872917907774365e-06, "loss": 0.5564, "step": 8911 }, { "epoch": 1.29, "grad_norm": 6.442858695983887, "learning_rate": 1.2871401772999959e-06, "loss": 0.5917, "step": 8912 }, { "epoch": 1.29, "grad_norm": 6.2837138175964355, "learning_rate": 1.286988556628725e-06, "loss": 0.5477, "step": 8913 }, { "epoch": 1.29, "grad_norm": 6.563881874084473, "learning_rate": 1.2868369287674216e-06, "loss": 0.4716, "step": 8914 }, { "epoch": 1.29, "grad_norm": 6.463689804077148, "learning_rate": 1.2866852937198855e-06, "loss": 0.5454, "step": 8915 }, { "epoch": 1.29, "grad_norm": 5.669532775878906, "learning_rate": 1.286533651489915e-06, "loss": 0.5122, "step": 8916 }, { "epoch": 1.29, "grad_norm": 6.216212272644043, "learning_rate": 1.2863820020813095e-06, "loss": 0.502, "step": 8917 }, { "epoch": 1.29, "grad_norm": 7.655427932739258, "learning_rate": 1.286230345497868e-06, "loss": 0.5969, "step": 8918 }, { "epoch": 1.29, "grad_norm": 6.599354267120361, "learning_rate": 1.2860786817433903e-06, "loss": 0.6031, "step": 8919 }, { "epoch": 1.29, "grad_norm": 5.849055767059326, "learning_rate": 1.2859270108216764e-06, "loss": 0.4585, "step": 8920 }, { "epoch": 1.29, "grad_norm": 5.824244976043701, "learning_rate": 1.2857753327365258e-06, "loss": 0.4827, "step": 8921 }, { "epoch": 1.29, "grad_norm": 6.680963516235352, "learning_rate": 1.2856236474917383e-06, "loss": 0.5382, "step": 8922 }, { "epoch": 1.29, "grad_norm": 6.6542205810546875, "learning_rate": 1.2854719550911143e-06, "loss": 0.5853, "step": 8923 }, { "epoch": 1.29, "grad_norm": 6.787193775177002, "learning_rate": 1.2853202555384547e-06, "loss": 0.5651, "step": 8924 }, { "epoch": 1.29, "grad_norm": 7.451748371124268, "learning_rate": 1.2851685488375593e-06, "loss": 0.5649, "step": 8925 }, { "epoch": 1.3, "grad_norm": 6.826278209686279, "learning_rate": 1.2850168349922297e-06, "loss": 0.6163, "step": 8926 }, { "epoch": 1.3, "grad_norm": 6.436089038848877, "learning_rate": 1.284865114006266e-06, "loss": 0.5734, "step": 8927 }, { "epoch": 1.3, "grad_norm": 6.883821487426758, "learning_rate": 1.28471338588347e-06, "loss": 0.4748, "step": 8928 }, { "epoch": 1.3, "grad_norm": 6.035908222198486, "learning_rate": 1.2845616506276427e-06, "loss": 0.5394, "step": 8929 }, { "epoch": 1.3, "grad_norm": 7.2698283195495605, "learning_rate": 1.2844099082425857e-06, "loss": 0.5867, "step": 8930 }, { "epoch": 1.3, "grad_norm": 5.978713035583496, "learning_rate": 1.2842581587321003e-06, "loss": 0.5262, "step": 8931 }, { "epoch": 1.3, "grad_norm": 6.391711235046387, "learning_rate": 1.2841064020999885e-06, "loss": 0.5414, "step": 8932 }, { "epoch": 1.3, "grad_norm": 6.239747524261475, "learning_rate": 1.2839546383500528e-06, "loss": 0.5021, "step": 8933 }, { "epoch": 1.3, "grad_norm": 5.809851169586182, "learning_rate": 1.2838028674860949e-06, "loss": 0.5233, "step": 8934 }, { "epoch": 1.3, "grad_norm": 6.793773651123047, "learning_rate": 1.283651089511917e-06, "loss": 0.5642, "step": 8935 }, { "epoch": 1.3, "grad_norm": 5.832453727722168, "learning_rate": 1.2834993044313222e-06, "loss": 0.5174, "step": 8936 }, { "epoch": 1.3, "grad_norm": 6.423985004425049, "learning_rate": 1.2833475122481127e-06, "loss": 0.5248, "step": 8937 }, { "epoch": 1.3, "grad_norm": 7.103752613067627, "learning_rate": 1.283195712966092e-06, "loss": 0.5683, "step": 8938 }, { "epoch": 1.3, "grad_norm": 5.949147701263428, "learning_rate": 1.2830439065890628e-06, "loss": 0.5299, "step": 8939 }, { "epoch": 1.3, "grad_norm": 6.34306526184082, "learning_rate": 1.2828920931208282e-06, "loss": 0.578, "step": 8940 }, { "epoch": 1.3, "grad_norm": 6.144754409790039, "learning_rate": 1.282740272565192e-06, "loss": 0.5041, "step": 8941 }, { "epoch": 1.3, "grad_norm": 6.197955131530762, "learning_rate": 1.2825884449259578e-06, "loss": 0.5324, "step": 8942 }, { "epoch": 1.3, "grad_norm": 6.599277973175049, "learning_rate": 1.2824366102069293e-06, "loss": 0.6066, "step": 8943 }, { "epoch": 1.3, "grad_norm": 5.923926830291748, "learning_rate": 1.2822847684119102e-06, "loss": 0.5495, "step": 8944 }, { "epoch": 1.3, "grad_norm": 6.850966930389404, "learning_rate": 1.2821329195447051e-06, "loss": 0.603, "step": 8945 }, { "epoch": 1.3, "grad_norm": 6.194354057312012, "learning_rate": 1.281981063609118e-06, "loss": 0.4978, "step": 8946 }, { "epoch": 1.3, "grad_norm": 6.717857360839844, "learning_rate": 1.2818292006089536e-06, "loss": 0.5095, "step": 8947 }, { "epoch": 1.3, "grad_norm": 6.182528018951416, "learning_rate": 1.2816773305480164e-06, "loss": 0.5496, "step": 8948 }, { "epoch": 1.3, "grad_norm": 7.021213054656982, "learning_rate": 1.2815254534301117e-06, "loss": 0.5172, "step": 8949 }, { "epoch": 1.3, "grad_norm": 6.310569763183594, "learning_rate": 1.2813735692590437e-06, "loss": 0.516, "step": 8950 }, { "epoch": 1.3, "grad_norm": 6.625014781951904, "learning_rate": 1.2812216780386185e-06, "loss": 0.5111, "step": 8951 }, { "epoch": 1.3, "grad_norm": 6.471928119659424, "learning_rate": 1.2810697797726407e-06, "loss": 0.5182, "step": 8952 }, { "epoch": 1.3, "grad_norm": 6.518959999084473, "learning_rate": 1.2809178744649164e-06, "loss": 0.5484, "step": 8953 }, { "epoch": 1.3, "grad_norm": 6.0684356689453125, "learning_rate": 1.2807659621192513e-06, "loss": 0.4793, "step": 8954 }, { "epoch": 1.3, "grad_norm": 6.305763244628906, "learning_rate": 1.2806140427394513e-06, "loss": 0.5799, "step": 8955 }, { "epoch": 1.3, "grad_norm": 5.802277088165283, "learning_rate": 1.2804621163293222e-06, "loss": 0.5212, "step": 8956 }, { "epoch": 1.3, "grad_norm": 6.6869001388549805, "learning_rate": 1.2803101828926708e-06, "loss": 0.5811, "step": 8957 }, { "epoch": 1.3, "grad_norm": 6.453786373138428, "learning_rate": 1.2801582424333029e-06, "loss": 0.5971, "step": 8958 }, { "epoch": 1.3, "grad_norm": 6.515607833862305, "learning_rate": 1.2800062949550254e-06, "loss": 0.5579, "step": 8959 }, { "epoch": 1.3, "grad_norm": 7.235925197601318, "learning_rate": 1.2798543404616452e-06, "loss": 0.6587, "step": 8960 }, { "epoch": 1.3, "grad_norm": 6.0298075675964355, "learning_rate": 1.2797023789569695e-06, "loss": 0.5092, "step": 8961 }, { "epoch": 1.3, "grad_norm": 6.023988246917725, "learning_rate": 1.2795504104448048e-06, "loss": 0.5088, "step": 8962 }, { "epoch": 1.3, "grad_norm": 6.097609996795654, "learning_rate": 1.279398434928959e-06, "loss": 0.6002, "step": 8963 }, { "epoch": 1.3, "grad_norm": 6.068426132202148, "learning_rate": 1.2792464524132393e-06, "loss": 0.5869, "step": 8964 }, { "epoch": 1.3, "grad_norm": 6.141747951507568, "learning_rate": 1.2790944629014535e-06, "loss": 0.5189, "step": 8965 }, { "epoch": 1.3, "grad_norm": 6.063326358795166, "learning_rate": 1.2789424663974093e-06, "loss": 0.517, "step": 8966 }, { "epoch": 1.3, "grad_norm": 6.129585266113281, "learning_rate": 1.278790462904915e-06, "loss": 0.5638, "step": 8967 }, { "epoch": 1.3, "grad_norm": 6.342556953430176, "learning_rate": 1.2786384524277786e-06, "loss": 0.5398, "step": 8968 }, { "epoch": 1.3, "grad_norm": 6.538375377655029, "learning_rate": 1.2784864349698083e-06, "loss": 0.503, "step": 8969 }, { "epoch": 1.3, "grad_norm": 6.5150885581970215, "learning_rate": 1.2783344105348132e-06, "loss": 0.5323, "step": 8970 }, { "epoch": 1.3, "grad_norm": 6.028109550476074, "learning_rate": 1.2781823791266018e-06, "loss": 0.5402, "step": 8971 }, { "epoch": 1.3, "grad_norm": 5.767075061798096, "learning_rate": 1.2780303407489826e-06, "loss": 0.5492, "step": 8972 }, { "epoch": 1.3, "grad_norm": 6.102355003356934, "learning_rate": 1.277878295405765e-06, "loss": 0.5818, "step": 8973 }, { "epoch": 1.3, "grad_norm": 7.057443141937256, "learning_rate": 1.277726243100758e-06, "loss": 0.61, "step": 8974 }, { "epoch": 1.3, "grad_norm": 6.305145740509033, "learning_rate": 1.2775741838377716e-06, "loss": 0.5298, "step": 8975 }, { "epoch": 1.3, "grad_norm": 6.848773002624512, "learning_rate": 1.2774221176206148e-06, "loss": 0.5234, "step": 8976 }, { "epoch": 1.3, "grad_norm": 5.78895902633667, "learning_rate": 1.277270044453098e-06, "loss": 0.556, "step": 8977 }, { "epoch": 1.3, "grad_norm": 5.551935195922852, "learning_rate": 1.2771179643390304e-06, "loss": 0.5481, "step": 8978 }, { "epoch": 1.3, "grad_norm": 6.691678524017334, "learning_rate": 1.2769658772822225e-06, "loss": 0.6315, "step": 8979 }, { "epoch": 1.3, "grad_norm": 6.793554782867432, "learning_rate": 1.2768137832864845e-06, "loss": 0.6391, "step": 8980 }, { "epoch": 1.3, "grad_norm": 6.185685157775879, "learning_rate": 1.2766616823556271e-06, "loss": 0.5816, "step": 8981 }, { "epoch": 1.3, "grad_norm": 6.257561206817627, "learning_rate": 1.2765095744934606e-06, "loss": 0.533, "step": 8982 }, { "epoch": 1.3, "grad_norm": 5.996594429016113, "learning_rate": 1.276357459703796e-06, "loss": 0.5385, "step": 8983 }, { "epoch": 1.3, "grad_norm": 5.822338581085205, "learning_rate": 1.2762053379904447e-06, "loss": 0.5697, "step": 8984 }, { "epoch": 1.3, "grad_norm": 6.244021415710449, "learning_rate": 1.276053209357217e-06, "loss": 0.5446, "step": 8985 }, { "epoch": 1.3, "grad_norm": 6.002920150756836, "learning_rate": 1.2759010738079247e-06, "loss": 0.5155, "step": 8986 }, { "epoch": 1.3, "grad_norm": 5.85655403137207, "learning_rate": 1.2757489313463796e-06, "loss": 0.5158, "step": 8987 }, { "epoch": 1.3, "grad_norm": 6.258211612701416, "learning_rate": 1.2755967819763926e-06, "loss": 0.4812, "step": 8988 }, { "epoch": 1.3, "grad_norm": 6.198417663574219, "learning_rate": 1.2754446257017764e-06, "loss": 0.5231, "step": 8989 }, { "epoch": 1.3, "grad_norm": 6.496093273162842, "learning_rate": 1.2752924625263423e-06, "loss": 0.5878, "step": 8990 }, { "epoch": 1.3, "grad_norm": 6.535361289978027, "learning_rate": 1.2751402924539033e-06, "loss": 0.5852, "step": 8991 }, { "epoch": 1.3, "grad_norm": 7.156918048858643, "learning_rate": 1.2749881154882711e-06, "loss": 0.619, "step": 8992 }, { "epoch": 1.3, "grad_norm": 6.705036163330078, "learning_rate": 1.2748359316332584e-06, "loss": 0.5504, "step": 8993 }, { "epoch": 1.3, "grad_norm": 5.5051374435424805, "learning_rate": 1.274683740892678e-06, "loss": 0.4964, "step": 8994 }, { "epoch": 1.31, "grad_norm": 7.646913528442383, "learning_rate": 1.274531543270343e-06, "loss": 0.5358, "step": 8995 }, { "epoch": 1.31, "grad_norm": 6.346851348876953, "learning_rate": 1.274379338770066e-06, "loss": 0.5642, "step": 8996 }, { "epoch": 1.31, "grad_norm": 6.187546730041504, "learning_rate": 1.2742271273956608e-06, "loss": 0.5491, "step": 8997 }, { "epoch": 1.31, "grad_norm": 6.770738124847412, "learning_rate": 1.2740749091509404e-06, "loss": 0.5395, "step": 8998 }, { "epoch": 1.31, "grad_norm": 6.652735710144043, "learning_rate": 1.2739226840397183e-06, "loss": 0.5833, "step": 8999 }, { "epoch": 1.31, "grad_norm": 6.633842468261719, "learning_rate": 1.2737704520658085e-06, "loss": 0.5724, "step": 9000 }, { "epoch": 1.31, "grad_norm": 6.723155498504639, "learning_rate": 1.273618213233025e-06, "loss": 0.4908, "step": 9001 }, { "epoch": 1.31, "grad_norm": 7.38453483581543, "learning_rate": 1.2734659675451816e-06, "loss": 0.5772, "step": 9002 }, { "epoch": 1.31, "grad_norm": 6.577548503875732, "learning_rate": 1.2733137150060923e-06, "loss": 0.593, "step": 9003 }, { "epoch": 1.31, "grad_norm": 6.822513103485107, "learning_rate": 1.2731614556195726e-06, "loss": 0.5063, "step": 9004 }, { "epoch": 1.31, "grad_norm": 7.104963302612305, "learning_rate": 1.2730091893894366e-06, "loss": 0.5449, "step": 9005 }, { "epoch": 1.31, "grad_norm": 6.079375267028809, "learning_rate": 1.2728569163194984e-06, "loss": 0.5823, "step": 9006 }, { "epoch": 1.31, "grad_norm": 5.984191417694092, "learning_rate": 1.2727046364135738e-06, "loss": 0.4996, "step": 9007 }, { "epoch": 1.31, "grad_norm": 7.098235607147217, "learning_rate": 1.2725523496754776e-06, "loss": 0.5241, "step": 9008 }, { "epoch": 1.31, "grad_norm": 6.159056186676025, "learning_rate": 1.2724000561090249e-06, "loss": 0.6007, "step": 9009 }, { "epoch": 1.31, "grad_norm": 6.854687690734863, "learning_rate": 1.2722477557180315e-06, "loss": 0.559, "step": 9010 }, { "epoch": 1.31, "grad_norm": 7.183736801147461, "learning_rate": 1.2720954485063132e-06, "loss": 0.528, "step": 9011 }, { "epoch": 1.31, "grad_norm": 5.934925556182861, "learning_rate": 1.2719431344776853e-06, "loss": 0.4775, "step": 9012 }, { "epoch": 1.31, "grad_norm": 6.856775283813477, "learning_rate": 1.2717908136359643e-06, "loss": 0.5665, "step": 9013 }, { "epoch": 1.31, "grad_norm": 5.988681793212891, "learning_rate": 1.2716384859849659e-06, "loss": 0.614, "step": 9014 }, { "epoch": 1.31, "grad_norm": 7.035165309906006, "learning_rate": 1.2714861515285065e-06, "loss": 0.5499, "step": 9015 }, { "epoch": 1.31, "grad_norm": 5.780824184417725, "learning_rate": 1.2713338102704026e-06, "loss": 0.435, "step": 9016 }, { "epoch": 1.31, "grad_norm": 6.807577610015869, "learning_rate": 1.271181462214471e-06, "loss": 0.5469, "step": 9017 }, { "epoch": 1.31, "grad_norm": 7.719323635101318, "learning_rate": 1.2710291073645287e-06, "loss": 0.6089, "step": 9018 }, { "epoch": 1.31, "grad_norm": 6.480006694793701, "learning_rate": 1.2708767457243925e-06, "loss": 0.5588, "step": 9019 }, { "epoch": 1.31, "grad_norm": 6.542502403259277, "learning_rate": 1.2707243772978794e-06, "loss": 0.545, "step": 9020 }, { "epoch": 1.31, "grad_norm": 6.15376091003418, "learning_rate": 1.2705720020888068e-06, "loss": 0.4927, "step": 9021 }, { "epoch": 1.31, "grad_norm": 6.118302822113037, "learning_rate": 1.2704196201009928e-06, "loss": 0.5521, "step": 9022 }, { "epoch": 1.31, "grad_norm": 5.734287261962891, "learning_rate": 1.270267231338254e-06, "loss": 0.5602, "step": 9023 }, { "epoch": 1.31, "grad_norm": 6.913835048675537, "learning_rate": 1.2701148358044092e-06, "loss": 0.6378, "step": 9024 }, { "epoch": 1.31, "grad_norm": 7.303103446960449, "learning_rate": 1.2699624335032762e-06, "loss": 0.5698, "step": 9025 }, { "epoch": 1.31, "grad_norm": 5.897487163543701, "learning_rate": 1.269810024438673e-06, "loss": 0.5203, "step": 9026 }, { "epoch": 1.31, "grad_norm": 6.56456995010376, "learning_rate": 1.2696576086144179e-06, "loss": 0.6181, "step": 9027 }, { "epoch": 1.31, "grad_norm": 6.647875785827637, "learning_rate": 1.2695051860343294e-06, "loss": 0.5956, "step": 9028 }, { "epoch": 1.31, "grad_norm": 7.030520915985107, "learning_rate": 1.2693527567022265e-06, "loss": 0.5125, "step": 9029 }, { "epoch": 1.31, "grad_norm": 7.195077419281006, "learning_rate": 1.2692003206219282e-06, "loss": 0.5908, "step": 9030 }, { "epoch": 1.31, "grad_norm": 6.630209445953369, "learning_rate": 1.2690478777972528e-06, "loss": 0.5474, "step": 9031 }, { "epoch": 1.31, "grad_norm": 6.76900053024292, "learning_rate": 1.2688954282320203e-06, "loss": 0.5299, "step": 9032 }, { "epoch": 1.31, "grad_norm": 6.5671281814575195, "learning_rate": 1.26874297193005e-06, "loss": 0.5696, "step": 9033 }, { "epoch": 1.31, "grad_norm": 5.8119401931762695, "learning_rate": 1.2685905088951608e-06, "loss": 0.5739, "step": 9034 }, { "epoch": 1.31, "grad_norm": 6.12885046005249, "learning_rate": 1.2684380391311728e-06, "loss": 0.6083, "step": 9035 }, { "epoch": 1.31, "grad_norm": 6.637474060058594, "learning_rate": 1.268285562641906e-06, "loss": 0.4828, "step": 9036 }, { "epoch": 1.31, "grad_norm": 6.540514945983887, "learning_rate": 1.2681330794311804e-06, "loss": 0.6197, "step": 9037 }, { "epoch": 1.31, "grad_norm": 6.28822660446167, "learning_rate": 1.267980589502816e-06, "loss": 0.5747, "step": 9038 }, { "epoch": 1.31, "grad_norm": 7.129144668579102, "learning_rate": 1.2678280928606335e-06, "loss": 0.5337, "step": 9039 }, { "epoch": 1.31, "grad_norm": 6.852786540985107, "learning_rate": 1.2676755895084536e-06, "loss": 0.6463, "step": 9040 }, { "epoch": 1.31, "grad_norm": 6.029952526092529, "learning_rate": 1.2675230794500965e-06, "loss": 0.5028, "step": 9041 }, { "epoch": 1.31, "grad_norm": 6.497941493988037, "learning_rate": 1.2673705626893828e-06, "loss": 0.5408, "step": 9042 }, { "epoch": 1.31, "grad_norm": 6.725857257843018, "learning_rate": 1.2672180392301346e-06, "loss": 0.5312, "step": 9043 }, { "epoch": 1.31, "grad_norm": 6.270909786224365, "learning_rate": 1.2670655090761724e-06, "loss": 0.5532, "step": 9044 }, { "epoch": 1.31, "grad_norm": 6.714020252227783, "learning_rate": 1.266912972231318e-06, "loss": 0.5642, "step": 9045 }, { "epoch": 1.31, "grad_norm": 6.024475574493408, "learning_rate": 1.2667604286993929e-06, "loss": 0.5149, "step": 9046 }, { "epoch": 1.31, "grad_norm": 6.754128456115723, "learning_rate": 1.2666078784842185e-06, "loss": 0.5135, "step": 9047 }, { "epoch": 1.31, "grad_norm": 6.141905307769775, "learning_rate": 1.2664553215896168e-06, "loss": 0.492, "step": 9048 }, { "epoch": 1.31, "grad_norm": 5.774586200714111, "learning_rate": 1.26630275801941e-06, "loss": 0.5226, "step": 9049 }, { "epoch": 1.31, "grad_norm": 6.531770706176758, "learning_rate": 1.2661501877774203e-06, "loss": 0.5134, "step": 9050 }, { "epoch": 1.31, "grad_norm": 6.0958428382873535, "learning_rate": 1.2659976108674704e-06, "loss": 0.5292, "step": 9051 }, { "epoch": 1.31, "grad_norm": 6.588393688201904, "learning_rate": 1.2658450272933821e-06, "loss": 0.5808, "step": 9052 }, { "epoch": 1.31, "grad_norm": 6.590281009674072, "learning_rate": 1.265692437058979e-06, "loss": 0.6055, "step": 9053 }, { "epoch": 1.31, "grad_norm": 5.725449085235596, "learning_rate": 1.2655398401680834e-06, "loss": 0.491, "step": 9054 }, { "epoch": 1.31, "grad_norm": 6.438556671142578, "learning_rate": 1.2653872366245187e-06, "loss": 0.4946, "step": 9055 }, { "epoch": 1.31, "grad_norm": 6.901920318603516, "learning_rate": 1.2652346264321077e-06, "loss": 0.5912, "step": 9056 }, { "epoch": 1.31, "grad_norm": 6.49364709854126, "learning_rate": 1.2650820095946741e-06, "loss": 0.5517, "step": 9057 }, { "epoch": 1.31, "grad_norm": 6.8839616775512695, "learning_rate": 1.2649293861160416e-06, "loss": 0.545, "step": 9058 }, { "epoch": 1.31, "grad_norm": 6.892629146575928, "learning_rate": 1.264776756000034e-06, "loss": 0.5423, "step": 9059 }, { "epoch": 1.31, "grad_norm": 6.481902122497559, "learning_rate": 1.2646241192504747e-06, "loss": 0.5252, "step": 9060 }, { "epoch": 1.31, "grad_norm": 6.337584972381592, "learning_rate": 1.2644714758711883e-06, "loss": 0.5523, "step": 9061 }, { "epoch": 1.31, "grad_norm": 6.317839622497559, "learning_rate": 1.2643188258659988e-06, "loss": 0.566, "step": 9062 }, { "epoch": 1.32, "grad_norm": 6.162083625793457, "learning_rate": 1.2641661692387304e-06, "loss": 0.5626, "step": 9063 }, { "epoch": 1.32, "grad_norm": 5.554599761962891, "learning_rate": 1.2640135059932078e-06, "loss": 0.4754, "step": 9064 }, { "epoch": 1.32, "grad_norm": 6.234583377838135, "learning_rate": 1.263860836133256e-06, "loss": 0.5943, "step": 9065 }, { "epoch": 1.32, "grad_norm": 6.725155830383301, "learning_rate": 1.2637081596626992e-06, "loss": 0.5597, "step": 9066 }, { "epoch": 1.32, "grad_norm": 5.971261978149414, "learning_rate": 1.2635554765853636e-06, "loss": 0.5425, "step": 9067 }, { "epoch": 1.32, "grad_norm": 5.6034979820251465, "learning_rate": 1.2634027869050733e-06, "loss": 0.5439, "step": 9068 }, { "epoch": 1.32, "grad_norm": 5.705776691436768, "learning_rate": 1.2632500906256542e-06, "loss": 0.4935, "step": 9069 }, { "epoch": 1.32, "grad_norm": 6.47165060043335, "learning_rate": 1.2630973877509322e-06, "loss": 0.5996, "step": 9070 }, { "epoch": 1.32, "grad_norm": 7.481491565704346, "learning_rate": 1.2629446782847321e-06, "loss": 0.5955, "step": 9071 }, { "epoch": 1.32, "grad_norm": 5.853129863739014, "learning_rate": 1.2627919622308807e-06, "loss": 0.4934, "step": 9072 }, { "epoch": 1.32, "grad_norm": 6.539303779602051, "learning_rate": 1.2626392395932036e-06, "loss": 0.5978, "step": 9073 }, { "epoch": 1.32, "grad_norm": 6.773990154266357, "learning_rate": 1.2624865103755274e-06, "loss": 0.5101, "step": 9074 }, { "epoch": 1.32, "grad_norm": 6.519693374633789, "learning_rate": 1.2623337745816777e-06, "loss": 0.5641, "step": 9075 }, { "epoch": 1.32, "grad_norm": 6.255433559417725, "learning_rate": 1.2621810322154818e-06, "loss": 0.5014, "step": 9076 }, { "epoch": 1.32, "grad_norm": 5.9731903076171875, "learning_rate": 1.2620282832807657e-06, "loss": 0.5178, "step": 9077 }, { "epoch": 1.32, "grad_norm": 8.384765625, "learning_rate": 1.2618755277813574e-06, "loss": 0.5197, "step": 9078 }, { "epoch": 1.32, "grad_norm": 5.907266139984131, "learning_rate": 1.2617227657210828e-06, "loss": 0.4657, "step": 9079 }, { "epoch": 1.32, "grad_norm": 6.628192901611328, "learning_rate": 1.2615699971037694e-06, "loss": 0.5166, "step": 9080 }, { "epoch": 1.32, "grad_norm": 7.038252830505371, "learning_rate": 1.2614172219332452e-06, "loss": 0.6422, "step": 9081 }, { "epoch": 1.32, "grad_norm": 7.247025966644287, "learning_rate": 1.2612644402133372e-06, "loss": 0.5096, "step": 9082 }, { "epoch": 1.32, "grad_norm": 6.131010055541992, "learning_rate": 1.261111651947873e-06, "loss": 0.5395, "step": 9083 }, { "epoch": 1.32, "grad_norm": 6.731506824493408, "learning_rate": 1.2609588571406805e-06, "loss": 0.4979, "step": 9084 }, { "epoch": 1.32, "grad_norm": 7.098759174346924, "learning_rate": 1.2608060557955879e-06, "loss": 0.5443, "step": 9085 }, { "epoch": 1.32, "grad_norm": 6.004537105560303, "learning_rate": 1.2606532479164236e-06, "loss": 0.5227, "step": 9086 }, { "epoch": 1.32, "grad_norm": 6.166445255279541, "learning_rate": 1.2605004335070155e-06, "loss": 0.5436, "step": 9087 }, { "epoch": 1.32, "grad_norm": 7.081042766571045, "learning_rate": 1.2603476125711922e-06, "loss": 0.5715, "step": 9088 }, { "epoch": 1.32, "grad_norm": 6.24297571182251, "learning_rate": 1.2601947851127827e-06, "loss": 0.5125, "step": 9089 }, { "epoch": 1.32, "grad_norm": 5.862003803253174, "learning_rate": 1.2600419511356155e-06, "loss": 0.5166, "step": 9090 }, { "epoch": 1.32, "grad_norm": 5.6973490715026855, "learning_rate": 1.2598891106435198e-06, "loss": 0.5401, "step": 9091 }, { "epoch": 1.32, "grad_norm": 6.744311809539795, "learning_rate": 1.2597362636403249e-06, "loss": 0.5372, "step": 9092 }, { "epoch": 1.32, "grad_norm": 7.27288818359375, "learning_rate": 1.2595834101298594e-06, "loss": 0.5547, "step": 9093 }, { "epoch": 1.32, "grad_norm": 7.261601448059082, "learning_rate": 1.2594305501159538e-06, "loss": 0.5552, "step": 9094 }, { "epoch": 1.32, "grad_norm": 6.423548698425293, "learning_rate": 1.259277683602437e-06, "loss": 0.4891, "step": 9095 }, { "epoch": 1.32, "grad_norm": 8.293384552001953, "learning_rate": 1.2591248105931394e-06, "loss": 0.6027, "step": 9096 }, { "epoch": 1.32, "grad_norm": 6.226400852203369, "learning_rate": 1.2589719310918904e-06, "loss": 0.5519, "step": 9097 }, { "epoch": 1.32, "grad_norm": 6.993908405303955, "learning_rate": 1.2588190451025207e-06, "loss": 0.5866, "step": 9098 }, { "epoch": 1.32, "grad_norm": 6.436413288116455, "learning_rate": 1.2586661526288603e-06, "loss": 0.5625, "step": 9099 }, { "epoch": 1.32, "grad_norm": 6.859861373901367, "learning_rate": 1.2585132536747397e-06, "loss": 0.5679, "step": 9100 }, { "epoch": 1.32, "grad_norm": 6.765675067901611, "learning_rate": 1.2583603482439896e-06, "loss": 0.5017, "step": 9101 }, { "epoch": 1.32, "grad_norm": 6.307423114776611, "learning_rate": 1.258207436340441e-06, "loss": 0.5913, "step": 9102 }, { "epoch": 1.32, "grad_norm": 5.867937088012695, "learning_rate": 1.2580545179679242e-06, "loss": 0.4791, "step": 9103 }, { "epoch": 1.32, "grad_norm": 6.450512886047363, "learning_rate": 1.257901593130271e-06, "loss": 0.5403, "step": 9104 }, { "epoch": 1.32, "grad_norm": 6.9257283210754395, "learning_rate": 1.2577486618313123e-06, "loss": 0.5541, "step": 9105 }, { "epoch": 1.32, "grad_norm": 6.165036678314209, "learning_rate": 1.2575957240748798e-06, "loss": 0.488, "step": 9106 }, { "epoch": 1.32, "grad_norm": 6.4961628913879395, "learning_rate": 1.2574427798648048e-06, "loss": 0.5518, "step": 9107 }, { "epoch": 1.32, "grad_norm": 6.109574794769287, "learning_rate": 1.2572898292049194e-06, "loss": 0.5505, "step": 9108 }, { "epoch": 1.32, "grad_norm": 7.257793426513672, "learning_rate": 1.2571368720990556e-06, "loss": 0.5976, "step": 9109 }, { "epoch": 1.32, "grad_norm": 6.820154666900635, "learning_rate": 1.256983908551045e-06, "loss": 0.5336, "step": 9110 }, { "epoch": 1.32, "grad_norm": 6.9748945236206055, "learning_rate": 1.25683093856472e-06, "loss": 0.5153, "step": 9111 }, { "epoch": 1.32, "grad_norm": 6.101904392242432, "learning_rate": 1.2566779621439135e-06, "loss": 0.5594, "step": 9112 }, { "epoch": 1.32, "grad_norm": 6.621454238891602, "learning_rate": 1.2565249792924572e-06, "loss": 0.5275, "step": 9113 }, { "epoch": 1.32, "grad_norm": 6.4559736251831055, "learning_rate": 1.2563719900141846e-06, "loss": 0.5344, "step": 9114 }, { "epoch": 1.32, "grad_norm": 7.858244895935059, "learning_rate": 1.2562189943129285e-06, "loss": 0.591, "step": 9115 }, { "epoch": 1.32, "grad_norm": 6.412772178649902, "learning_rate": 1.256065992192522e-06, "loss": 0.6192, "step": 9116 }, { "epoch": 1.32, "grad_norm": 6.553779602050781, "learning_rate": 1.255912983656798e-06, "loss": 0.5219, "step": 9117 }, { "epoch": 1.32, "grad_norm": 6.490416526794434, "learning_rate": 1.2557599687095897e-06, "loss": 0.6148, "step": 9118 }, { "epoch": 1.32, "grad_norm": 5.874951362609863, "learning_rate": 1.255606947354731e-06, "loss": 0.4621, "step": 9119 }, { "epoch": 1.32, "grad_norm": 6.01533317565918, "learning_rate": 1.2554539195960553e-06, "loss": 0.5598, "step": 9120 }, { "epoch": 1.32, "grad_norm": 6.719123363494873, "learning_rate": 1.2553008854373972e-06, "loss": 0.5539, "step": 9121 }, { "epoch": 1.32, "grad_norm": 6.501761436462402, "learning_rate": 1.2551478448825903e-06, "loss": 0.4913, "step": 9122 }, { "epoch": 1.32, "grad_norm": 6.563639163970947, "learning_rate": 1.2549947979354684e-06, "loss": 0.5581, "step": 9123 }, { "epoch": 1.32, "grad_norm": 6.238431453704834, "learning_rate": 1.2548417445998665e-06, "loss": 0.5463, "step": 9124 }, { "epoch": 1.32, "grad_norm": 6.314613342285156, "learning_rate": 1.2546886848796186e-06, "loss": 0.5272, "step": 9125 }, { "epoch": 1.32, "grad_norm": 6.682209014892578, "learning_rate": 1.2545356187785593e-06, "loss": 0.6584, "step": 9126 }, { "epoch": 1.32, "grad_norm": 6.617912292480469, "learning_rate": 1.2543825463005237e-06, "loss": 0.6666, "step": 9127 }, { "epoch": 1.32, "grad_norm": 6.453165531158447, "learning_rate": 1.254229467449347e-06, "loss": 0.5549, "step": 9128 }, { "epoch": 1.32, "grad_norm": 6.939537525177002, "learning_rate": 1.2540763822288637e-06, "loss": 0.613, "step": 9129 }, { "epoch": 1.32, "grad_norm": 7.1401686668396, "learning_rate": 1.25392329064291e-06, "loss": 0.5608, "step": 9130 }, { "epoch": 1.32, "grad_norm": 6.725203037261963, "learning_rate": 1.2537701926953205e-06, "loss": 0.5871, "step": 9131 }, { "epoch": 1.33, "grad_norm": 6.013189315795898, "learning_rate": 1.2536170883899313e-06, "loss": 0.5102, "step": 9132 }, { "epoch": 1.33, "grad_norm": 5.863851070404053, "learning_rate": 1.2534639777305778e-06, "loss": 0.5692, "step": 9133 }, { "epoch": 1.33, "grad_norm": 7.079062461853027, "learning_rate": 1.2533108607210964e-06, "loss": 0.6134, "step": 9134 }, { "epoch": 1.33, "grad_norm": 6.76964807510376, "learning_rate": 1.2531577373653228e-06, "loss": 0.5023, "step": 9135 }, { "epoch": 1.33, "grad_norm": 6.717423915863037, "learning_rate": 1.2530046076670935e-06, "loss": 0.6086, "step": 9136 }, { "epoch": 1.33, "grad_norm": 6.772152423858643, "learning_rate": 1.252851471630245e-06, "loss": 0.5411, "step": 9137 }, { "epoch": 1.33, "grad_norm": 6.078986644744873, "learning_rate": 1.2526983292586134e-06, "loss": 0.5027, "step": 9138 }, { "epoch": 1.33, "grad_norm": 6.014969825744629, "learning_rate": 1.2525451805560361e-06, "loss": 0.5272, "step": 9139 }, { "epoch": 1.33, "grad_norm": 7.104048728942871, "learning_rate": 1.2523920255263493e-06, "loss": 0.5393, "step": 9140 }, { "epoch": 1.33, "grad_norm": 7.026402473449707, "learning_rate": 1.2522388641733908e-06, "loss": 0.6118, "step": 9141 }, { "epoch": 1.33, "grad_norm": 6.710355758666992, "learning_rate": 1.252085696500997e-06, "loss": 0.5719, "step": 9142 }, { "epoch": 1.33, "grad_norm": 6.354707717895508, "learning_rate": 1.2519325225130058e-06, "loss": 0.5127, "step": 9143 }, { "epoch": 1.33, "grad_norm": 5.908997535705566, "learning_rate": 1.2517793422132548e-06, "loss": 0.5561, "step": 9144 }, { "epoch": 1.33, "grad_norm": 5.9652791023254395, "learning_rate": 1.2516261556055815e-06, "loss": 0.5691, "step": 9145 }, { "epoch": 1.33, "grad_norm": 6.0102667808532715, "learning_rate": 1.2514729626938233e-06, "loss": 0.4938, "step": 9146 }, { "epoch": 1.33, "grad_norm": 6.75894832611084, "learning_rate": 1.2513197634818186e-06, "loss": 0.5394, "step": 9147 }, { "epoch": 1.33, "grad_norm": 7.098729133605957, "learning_rate": 1.251166557973406e-06, "loss": 0.516, "step": 9148 }, { "epoch": 1.33, "grad_norm": 5.623929500579834, "learning_rate": 1.251013346172423e-06, "loss": 0.534, "step": 9149 }, { "epoch": 1.33, "grad_norm": 7.083382606506348, "learning_rate": 1.250860128082709e-06, "loss": 0.609, "step": 9150 }, { "epoch": 1.33, "grad_norm": 6.170591354370117, "learning_rate": 1.2507069037081016e-06, "loss": 0.5631, "step": 9151 }, { "epoch": 1.33, "grad_norm": 6.39200496673584, "learning_rate": 1.2505536730524404e-06, "loss": 0.5559, "step": 9152 }, { "epoch": 1.33, "grad_norm": 6.283904552459717, "learning_rate": 1.250400436119564e-06, "loss": 0.4958, "step": 9153 }, { "epoch": 1.33, "grad_norm": 5.80116605758667, "learning_rate": 1.2502471929133112e-06, "loss": 0.5289, "step": 9154 }, { "epoch": 1.33, "grad_norm": 6.707247257232666, "learning_rate": 1.2500939434375218e-06, "loss": 0.6096, "step": 9155 }, { "epoch": 1.33, "grad_norm": 7.541426181793213, "learning_rate": 1.249940687696035e-06, "loss": 0.6073, "step": 9156 }, { "epoch": 1.33, "grad_norm": 6.445777893066406, "learning_rate": 1.2497874256926905e-06, "loss": 0.4739, "step": 9157 }, { "epoch": 1.33, "grad_norm": 6.167665958404541, "learning_rate": 1.2496341574313278e-06, "loss": 0.5169, "step": 9158 }, { "epoch": 1.33, "grad_norm": 7.319546222686768, "learning_rate": 1.249480882915787e-06, "loss": 0.5656, "step": 9159 }, { "epoch": 1.33, "grad_norm": 5.671518325805664, "learning_rate": 1.249327602149908e-06, "loss": 0.5229, "step": 9160 }, { "epoch": 1.33, "grad_norm": 6.145435333251953, "learning_rate": 1.249174315137531e-06, "loss": 0.5305, "step": 9161 }, { "epoch": 1.33, "grad_norm": 6.1530842781066895, "learning_rate": 1.249021021882496e-06, "loss": 0.546, "step": 9162 }, { "epoch": 1.33, "grad_norm": 5.78956413269043, "learning_rate": 1.2488677223886446e-06, "loss": 0.5462, "step": 9163 }, { "epoch": 1.33, "grad_norm": 6.559232234954834, "learning_rate": 1.2487144166598164e-06, "loss": 0.5061, "step": 9164 }, { "epoch": 1.33, "grad_norm": 6.63547945022583, "learning_rate": 1.248561104699853e-06, "loss": 0.5224, "step": 9165 }, { "epoch": 1.33, "grad_norm": 7.343385219573975, "learning_rate": 1.2484077865125945e-06, "loss": 0.5839, "step": 9166 }, { "epoch": 1.33, "grad_norm": 6.167026042938232, "learning_rate": 1.2482544621018828e-06, "loss": 0.5756, "step": 9167 }, { "epoch": 1.33, "grad_norm": 6.481902599334717, "learning_rate": 1.2481011314715588e-06, "loss": 0.4821, "step": 9168 }, { "epoch": 1.33, "grad_norm": 7.287515163421631, "learning_rate": 1.2479477946254643e-06, "loss": 0.6397, "step": 9169 }, { "epoch": 1.33, "grad_norm": 7.1982645988464355, "learning_rate": 1.2477944515674403e-06, "loss": 0.5755, "step": 9170 }, { "epoch": 1.33, "grad_norm": 6.909660816192627, "learning_rate": 1.2476411023013292e-06, "loss": 0.5877, "step": 9171 }, { "epoch": 1.33, "grad_norm": 6.291736125946045, "learning_rate": 1.2474877468309723e-06, "loss": 0.5384, "step": 9172 }, { "epoch": 1.33, "grad_norm": 6.341387748718262, "learning_rate": 1.2473343851602125e-06, "loss": 0.4825, "step": 9173 }, { "epoch": 1.33, "grad_norm": 6.49940824508667, "learning_rate": 1.2471810172928915e-06, "loss": 0.596, "step": 9174 }, { "epoch": 1.33, "grad_norm": 6.462960243225098, "learning_rate": 1.2470276432328516e-06, "loss": 0.5519, "step": 9175 }, { "epoch": 1.33, "grad_norm": 7.962314128875732, "learning_rate": 1.2468742629839352e-06, "loss": 0.4885, "step": 9176 }, { "epoch": 1.33, "grad_norm": 6.828153610229492, "learning_rate": 1.2467208765499857e-06, "loss": 0.5562, "step": 9177 }, { "epoch": 1.33, "grad_norm": 6.599499702453613, "learning_rate": 1.2465674839348454e-06, "loss": 0.6004, "step": 9178 }, { "epoch": 1.33, "grad_norm": 5.860548496246338, "learning_rate": 1.2464140851423574e-06, "loss": 0.5552, "step": 9179 }, { "epoch": 1.33, "grad_norm": 6.45205020904541, "learning_rate": 1.2462606801763649e-06, "loss": 0.5306, "step": 9180 }, { "epoch": 1.33, "grad_norm": 6.0773210525512695, "learning_rate": 1.246107269040711e-06, "loss": 0.6025, "step": 9181 }, { "epoch": 1.33, "grad_norm": 5.563640117645264, "learning_rate": 1.2459538517392395e-06, "loss": 0.5088, "step": 9182 }, { "epoch": 1.33, "grad_norm": 6.3904571533203125, "learning_rate": 1.245800428275794e-06, "loss": 0.5001, "step": 9183 }, { "epoch": 1.33, "grad_norm": 6.223964214324951, "learning_rate": 1.245646998654218e-06, "loss": 0.5224, "step": 9184 }, { "epoch": 1.33, "grad_norm": 6.641674041748047, "learning_rate": 1.245493562878356e-06, "loss": 0.546, "step": 9185 }, { "epoch": 1.33, "grad_norm": 5.789113998413086, "learning_rate": 1.245340120952051e-06, "loss": 0.5218, "step": 9186 }, { "epoch": 1.33, "grad_norm": 6.448634624481201, "learning_rate": 1.2451866728791483e-06, "loss": 0.5106, "step": 9187 }, { "epoch": 1.33, "grad_norm": 6.374603748321533, "learning_rate": 1.2450332186634917e-06, "loss": 0.5859, "step": 9188 }, { "epoch": 1.33, "grad_norm": 6.783294677734375, "learning_rate": 1.244879758308926e-06, "loss": 0.5817, "step": 9189 }, { "epoch": 1.33, "grad_norm": 6.481525421142578, "learning_rate": 1.2447262918192961e-06, "loss": 0.5475, "step": 9190 }, { "epoch": 1.33, "grad_norm": 6.386589527130127, "learning_rate": 1.2445728191984464e-06, "loss": 0.5182, "step": 9191 }, { "epoch": 1.33, "grad_norm": 6.626706600189209, "learning_rate": 1.244419340450222e-06, "loss": 0.5764, "step": 9192 }, { "epoch": 1.33, "grad_norm": 7.431110858917236, "learning_rate": 1.2442658555784685e-06, "loss": 0.4951, "step": 9193 }, { "epoch": 1.33, "grad_norm": 6.574624538421631, "learning_rate": 1.2441123645870307e-06, "loss": 0.5746, "step": 9194 }, { "epoch": 1.33, "grad_norm": 7.196655750274658, "learning_rate": 1.243958867479754e-06, "loss": 0.6044, "step": 9195 }, { "epoch": 1.33, "grad_norm": 6.3954758644104, "learning_rate": 1.2438053642604845e-06, "loss": 0.4733, "step": 9196 }, { "epoch": 1.33, "grad_norm": 6.303436756134033, "learning_rate": 1.2436518549330677e-06, "loss": 0.5445, "step": 9197 }, { "epoch": 1.33, "grad_norm": 6.59197998046875, "learning_rate": 1.2434983395013493e-06, "loss": 0.578, "step": 9198 }, { "epoch": 1.33, "grad_norm": 6.79741907119751, "learning_rate": 1.243344817969176e-06, "loss": 0.4854, "step": 9199 }, { "epoch": 1.33, "grad_norm": 6.553240776062012, "learning_rate": 1.2431912903403935e-06, "loss": 0.5758, "step": 9200 }, { "epoch": 1.34, "grad_norm": 6.723324298858643, "learning_rate": 1.2430377566188486e-06, "loss": 0.5792, "step": 9201 }, { "epoch": 1.34, "grad_norm": 7.325719833374023, "learning_rate": 1.2428842168083872e-06, "loss": 0.5161, "step": 9202 }, { "epoch": 1.34, "grad_norm": 6.915548324584961, "learning_rate": 1.2427306709128566e-06, "loss": 0.6163, "step": 9203 }, { "epoch": 1.34, "grad_norm": 7.481595993041992, "learning_rate": 1.2425771189361035e-06, "loss": 0.5313, "step": 9204 }, { "epoch": 1.34, "grad_norm": 6.563412666320801, "learning_rate": 1.2424235608819748e-06, "loss": 0.5391, "step": 9205 }, { "epoch": 1.34, "grad_norm": 6.0504231452941895, "learning_rate": 1.242269996754318e-06, "loss": 0.5063, "step": 9206 }, { "epoch": 1.34, "grad_norm": 6.616901874542236, "learning_rate": 1.2421164265569797e-06, "loss": 0.5152, "step": 9207 }, { "epoch": 1.34, "grad_norm": 6.41615104675293, "learning_rate": 1.2419628502938078e-06, "loss": 0.4982, "step": 9208 }, { "epoch": 1.34, "grad_norm": 6.170596122741699, "learning_rate": 1.24180926796865e-06, "loss": 0.4979, "step": 9209 }, { "epoch": 1.34, "grad_norm": 6.088522434234619, "learning_rate": 1.2416556795853535e-06, "loss": 0.5715, "step": 9210 }, { "epoch": 1.34, "grad_norm": 7.558117389678955, "learning_rate": 1.241502085147767e-06, "loss": 0.5598, "step": 9211 }, { "epoch": 1.34, "grad_norm": 7.040401935577393, "learning_rate": 1.2413484846597376e-06, "loss": 0.5157, "step": 9212 }, { "epoch": 1.34, "grad_norm": 6.365400314331055, "learning_rate": 1.241194878125115e-06, "loss": 0.6298, "step": 9213 }, { "epoch": 1.34, "grad_norm": 6.531989097595215, "learning_rate": 1.241041265547746e-06, "loss": 0.5608, "step": 9214 }, { "epoch": 1.34, "grad_norm": 6.091947555541992, "learning_rate": 1.2408876469314797e-06, "loss": 0.5196, "step": 9215 }, { "epoch": 1.34, "grad_norm": 7.501274108886719, "learning_rate": 1.240734022280165e-06, "loss": 0.5332, "step": 9216 }, { "epoch": 1.34, "grad_norm": 6.29506254196167, "learning_rate": 1.2405803915976503e-06, "loss": 0.5021, "step": 9217 }, { "epoch": 1.34, "grad_norm": 6.507394313812256, "learning_rate": 1.2404267548877848e-06, "loss": 0.5649, "step": 9218 }, { "epoch": 1.34, "grad_norm": 6.000511646270752, "learning_rate": 1.2402731121544178e-06, "loss": 0.5291, "step": 9219 }, { "epoch": 1.34, "grad_norm": 6.618308067321777, "learning_rate": 1.2401194634013983e-06, "loss": 0.5414, "step": 9220 }, { "epoch": 1.34, "grad_norm": 6.358127593994141, "learning_rate": 1.2399658086325761e-06, "loss": 0.4919, "step": 9221 }, { "epoch": 1.34, "grad_norm": 6.530317306518555, "learning_rate": 1.2398121478518001e-06, "loss": 0.5638, "step": 9222 }, { "epoch": 1.34, "grad_norm": 6.10461950302124, "learning_rate": 1.2396584810629204e-06, "loss": 0.5729, "step": 9223 }, { "epoch": 1.34, "grad_norm": 6.646990776062012, "learning_rate": 1.239504808269787e-06, "loss": 0.5399, "step": 9224 }, { "epoch": 1.34, "grad_norm": 6.2593278884887695, "learning_rate": 1.2393511294762496e-06, "loss": 0.5148, "step": 9225 }, { "epoch": 1.34, "grad_norm": 6.428503036499023, "learning_rate": 1.239197444686159e-06, "loss": 0.5954, "step": 9226 }, { "epoch": 1.34, "grad_norm": 6.047351360321045, "learning_rate": 1.2390437539033644e-06, "loss": 0.5311, "step": 9227 }, { "epoch": 1.34, "grad_norm": 6.0998148918151855, "learning_rate": 1.2388900571317174e-06, "loss": 0.5537, "step": 9228 }, { "epoch": 1.34, "grad_norm": 6.85917854309082, "learning_rate": 1.2387363543750681e-06, "loss": 0.5855, "step": 9229 }, { "epoch": 1.34, "grad_norm": 6.529352188110352, "learning_rate": 1.2385826456372673e-06, "loss": 0.5281, "step": 9230 }, { "epoch": 1.34, "grad_norm": 6.863232135772705, "learning_rate": 1.238428930922166e-06, "loss": 0.5007, "step": 9231 }, { "epoch": 1.34, "grad_norm": 6.622470378875732, "learning_rate": 1.2382752102336152e-06, "loss": 0.5333, "step": 9232 }, { "epoch": 1.34, "grad_norm": 6.845686912536621, "learning_rate": 1.238121483575466e-06, "loss": 0.5383, "step": 9233 }, { "epoch": 1.34, "grad_norm": 5.833419322967529, "learning_rate": 1.2379677509515705e-06, "loss": 0.5195, "step": 9234 }, { "epoch": 1.34, "grad_norm": 6.55613899230957, "learning_rate": 1.2378140123657791e-06, "loss": 0.591, "step": 9235 }, { "epoch": 1.34, "grad_norm": 8.2700777053833, "learning_rate": 1.2376602678219443e-06, "loss": 0.6193, "step": 9236 }, { "epoch": 1.34, "grad_norm": 6.274471759796143, "learning_rate": 1.2375065173239177e-06, "loss": 0.5819, "step": 9237 }, { "epoch": 1.34, "grad_norm": 7.107819080352783, "learning_rate": 1.237352760875551e-06, "loss": 0.6284, "step": 9238 }, { "epoch": 1.34, "grad_norm": 6.53269624710083, "learning_rate": 1.2371989984806964e-06, "loss": 0.5095, "step": 9239 }, { "epoch": 1.34, "grad_norm": 6.609547138214111, "learning_rate": 1.2370452301432067e-06, "loss": 0.4939, "step": 9240 }, { "epoch": 1.34, "grad_norm": 6.694978713989258, "learning_rate": 1.236891455866934e-06, "loss": 0.603, "step": 9241 }, { "epoch": 1.34, "grad_norm": 7.5086517333984375, "learning_rate": 1.2367376756557305e-06, "loss": 0.6026, "step": 9242 }, { "epoch": 1.34, "grad_norm": 6.524929046630859, "learning_rate": 1.236583889513449e-06, "loss": 0.5238, "step": 9243 }, { "epoch": 1.34, "grad_norm": 6.561086654663086, "learning_rate": 1.236430097443943e-06, "loss": 0.5152, "step": 9244 }, { "epoch": 1.34, "grad_norm": 8.062527656555176, "learning_rate": 1.2362762994510648e-06, "loss": 0.5253, "step": 9245 }, { "epoch": 1.34, "grad_norm": 6.102034091949463, "learning_rate": 1.2361224955386677e-06, "loss": 0.504, "step": 9246 }, { "epoch": 1.34, "grad_norm": 6.367722988128662, "learning_rate": 1.2359686857106053e-06, "loss": 0.5206, "step": 9247 }, { "epoch": 1.34, "grad_norm": 6.298595905303955, "learning_rate": 1.235814869970731e-06, "loss": 0.5185, "step": 9248 }, { "epoch": 1.34, "grad_norm": 6.53993034362793, "learning_rate": 1.235661048322898e-06, "loss": 0.5574, "step": 9249 }, { "epoch": 1.34, "grad_norm": 6.505997657775879, "learning_rate": 1.2355072207709607e-06, "loss": 0.5461, "step": 9250 }, { "epoch": 1.34, "grad_norm": 6.654128551483154, "learning_rate": 1.2353533873187722e-06, "loss": 0.5304, "step": 9251 }, { "epoch": 1.34, "grad_norm": 6.084795951843262, "learning_rate": 1.2351995479701873e-06, "loss": 0.5078, "step": 9252 }, { "epoch": 1.34, "grad_norm": 6.2442169189453125, "learning_rate": 1.23504570272906e-06, "loss": 0.5423, "step": 9253 }, { "epoch": 1.34, "grad_norm": 7.13163423538208, "learning_rate": 1.2348918515992443e-06, "loss": 0.595, "step": 9254 }, { "epoch": 1.34, "grad_norm": 6.191919326782227, "learning_rate": 1.234737994584595e-06, "loss": 0.553, "step": 9255 }, { "epoch": 1.34, "grad_norm": 6.0317463874816895, "learning_rate": 1.2345841316889668e-06, "loss": 0.553, "step": 9256 }, { "epoch": 1.34, "grad_norm": 8.220463752746582, "learning_rate": 1.234430262916214e-06, "loss": 0.5525, "step": 9257 }, { "epoch": 1.34, "grad_norm": 6.94448184967041, "learning_rate": 1.2342763882701922e-06, "loss": 0.5961, "step": 9258 }, { "epoch": 1.34, "grad_norm": 5.872767448425293, "learning_rate": 1.2341225077547559e-06, "loss": 0.5633, "step": 9259 }, { "epoch": 1.34, "grad_norm": 5.97345495223999, "learning_rate": 1.2339686213737607e-06, "loss": 0.5151, "step": 9260 }, { "epoch": 1.34, "grad_norm": 6.614163875579834, "learning_rate": 1.2338147291310617e-06, "loss": 0.5377, "step": 9261 }, { "epoch": 1.34, "grad_norm": 5.71530818939209, "learning_rate": 1.233660831030515e-06, "loss": 0.4993, "step": 9262 }, { "epoch": 1.34, "grad_norm": 6.585021018981934, "learning_rate": 1.2335069270759754e-06, "loss": 0.5261, "step": 9263 }, { "epoch": 1.34, "grad_norm": 6.417051315307617, "learning_rate": 1.2333530172712994e-06, "loss": 0.5188, "step": 9264 }, { "epoch": 1.34, "grad_norm": 6.454793930053711, "learning_rate": 1.2331991016203425e-06, "loss": 0.5567, "step": 9265 }, { "epoch": 1.34, "grad_norm": 6.872431755065918, "learning_rate": 1.233045180126961e-06, "loss": 0.5517, "step": 9266 }, { "epoch": 1.34, "grad_norm": 6.615525245666504, "learning_rate": 1.2328912527950113e-06, "loss": 0.4739, "step": 9267 }, { "epoch": 1.34, "grad_norm": 6.230515003204346, "learning_rate": 1.2327373196283496e-06, "loss": 0.507, "step": 9268 }, { "epoch": 1.34, "grad_norm": 6.863838195800781, "learning_rate": 1.2325833806308328e-06, "loss": 0.5659, "step": 9269 }, { "epoch": 1.35, "grad_norm": 6.64318323135376, "learning_rate": 1.232429435806317e-06, "loss": 0.5729, "step": 9270 }, { "epoch": 1.35, "grad_norm": 5.757368564605713, "learning_rate": 1.2322754851586594e-06, "loss": 0.5136, "step": 9271 }, { "epoch": 1.35, "grad_norm": 7.145184516906738, "learning_rate": 1.232121528691717e-06, "loss": 0.5573, "step": 9272 }, { "epoch": 1.35, "grad_norm": 6.125672817230225, "learning_rate": 1.2319675664093468e-06, "loss": 0.5045, "step": 9273 }, { "epoch": 1.35, "grad_norm": 7.181973934173584, "learning_rate": 1.231813598315406e-06, "loss": 0.4565, "step": 9274 }, { "epoch": 1.35, "grad_norm": 6.303185939788818, "learning_rate": 1.2316596244137526e-06, "loss": 0.5237, "step": 9275 }, { "epoch": 1.35, "grad_norm": 5.955331325531006, "learning_rate": 1.2315056447082436e-06, "loss": 0.508, "step": 9276 }, { "epoch": 1.35, "grad_norm": 6.1044602394104, "learning_rate": 1.2313516592027361e-06, "loss": 0.5615, "step": 9277 }, { "epoch": 1.35, "grad_norm": 6.3576273918151855, "learning_rate": 1.2311976679010894e-06, "loss": 0.479, "step": 9278 }, { "epoch": 1.35, "grad_norm": 7.095841407775879, "learning_rate": 1.2310436708071605e-06, "loss": 0.5599, "step": 9279 }, { "epoch": 1.35, "grad_norm": 5.224973678588867, "learning_rate": 1.230889667924808e-06, "loss": 0.5032, "step": 9280 }, { "epoch": 1.35, "grad_norm": 5.774054527282715, "learning_rate": 1.23073565925789e-06, "loss": 0.5354, "step": 9281 }, { "epoch": 1.35, "grad_norm": 5.949161052703857, "learning_rate": 1.230581644810265e-06, "loss": 0.551, "step": 9282 }, { "epoch": 1.35, "grad_norm": 6.1238627433776855, "learning_rate": 1.2304276245857914e-06, "loss": 0.5378, "step": 9283 }, { "epoch": 1.35, "grad_norm": 6.397077560424805, "learning_rate": 1.2302735985883282e-06, "loss": 0.5649, "step": 9284 }, { "epoch": 1.35, "grad_norm": 6.671841144561768, "learning_rate": 1.230119566821734e-06, "loss": 0.5049, "step": 9285 }, { "epoch": 1.35, "grad_norm": 6.871366024017334, "learning_rate": 1.229965529289868e-06, "loss": 0.6216, "step": 9286 }, { "epoch": 1.35, "grad_norm": 6.553562641143799, "learning_rate": 1.2298114859965894e-06, "loss": 0.5816, "step": 9287 }, { "epoch": 1.35, "grad_norm": 6.910578727722168, "learning_rate": 1.2296574369457572e-06, "loss": 0.5952, "step": 9288 }, { "epoch": 1.35, "grad_norm": 6.825206756591797, "learning_rate": 1.2295033821412316e-06, "loss": 0.519, "step": 9289 }, { "epoch": 1.35, "grad_norm": 6.841127872467041, "learning_rate": 1.2293493215868713e-06, "loss": 0.5089, "step": 9290 }, { "epoch": 1.35, "grad_norm": 6.1584792137146, "learning_rate": 1.2291952552865365e-06, "loss": 0.5543, "step": 9291 }, { "epoch": 1.35, "grad_norm": 6.052330493927002, "learning_rate": 1.2290411832440867e-06, "loss": 0.5582, "step": 9292 }, { "epoch": 1.35, "grad_norm": 6.912161350250244, "learning_rate": 1.2288871054633824e-06, "loss": 0.5518, "step": 9293 }, { "epoch": 1.35, "grad_norm": 8.110806465148926, "learning_rate": 1.2287330219482837e-06, "loss": 0.5617, "step": 9294 }, { "epoch": 1.35, "grad_norm": 6.503483295440674, "learning_rate": 1.2285789327026509e-06, "loss": 0.5705, "step": 9295 }, { "epoch": 1.35, "grad_norm": 6.179825782775879, "learning_rate": 1.228424837730344e-06, "loss": 0.5693, "step": 9296 }, { "epoch": 1.35, "grad_norm": 5.973986625671387, "learning_rate": 1.228270737035224e-06, "loss": 0.5222, "step": 9297 }, { "epoch": 1.35, "grad_norm": 6.746606349945068, "learning_rate": 1.2281166306211516e-06, "loss": 0.5909, "step": 9298 }, { "epoch": 1.35, "grad_norm": 6.290578842163086, "learning_rate": 1.2279625184919878e-06, "loss": 0.4922, "step": 9299 }, { "epoch": 1.35, "grad_norm": 5.837887287139893, "learning_rate": 1.2278084006515933e-06, "loss": 0.503, "step": 9300 }, { "epoch": 1.35, "grad_norm": 7.6126708984375, "learning_rate": 1.2276542771038297e-06, "loss": 0.5742, "step": 9301 }, { "epoch": 1.35, "grad_norm": 5.919590473175049, "learning_rate": 1.227500147852558e-06, "loss": 0.5047, "step": 9302 }, { "epoch": 1.35, "grad_norm": 6.573851585388184, "learning_rate": 1.2273460129016398e-06, "loss": 0.5613, "step": 9303 }, { "epoch": 1.35, "grad_norm": 7.206540584564209, "learning_rate": 1.2271918722549364e-06, "loss": 0.5141, "step": 9304 }, { "epoch": 1.35, "grad_norm": 6.13516092300415, "learning_rate": 1.2270377259163099e-06, "loss": 0.4928, "step": 9305 }, { "epoch": 1.35, "grad_norm": 6.090640544891357, "learning_rate": 1.226883573889622e-06, "loss": 0.5445, "step": 9306 }, { "epoch": 1.35, "grad_norm": 6.206185340881348, "learning_rate": 1.2267294161787352e-06, "loss": 0.523, "step": 9307 }, { "epoch": 1.35, "grad_norm": 6.9003214836120605, "learning_rate": 1.226575252787511e-06, "loss": 0.593, "step": 9308 }, { "epoch": 1.35, "grad_norm": 6.363449573516846, "learning_rate": 1.226421083719812e-06, "loss": 0.5796, "step": 9309 }, { "epoch": 1.35, "grad_norm": 6.111989974975586, "learning_rate": 1.2262669089795007e-06, "loss": 0.5422, "step": 9310 }, { "epoch": 1.35, "grad_norm": 6.21102237701416, "learning_rate": 1.2261127285704396e-06, "loss": 0.4884, "step": 9311 }, { "epoch": 1.35, "grad_norm": 6.002488136291504, "learning_rate": 1.2259585424964913e-06, "loss": 0.4337, "step": 9312 }, { "epoch": 1.35, "grad_norm": 6.462878227233887, "learning_rate": 1.225804350761519e-06, "loss": 0.6102, "step": 9313 }, { "epoch": 1.35, "grad_norm": 7.1735334396362305, "learning_rate": 1.2256501533693857e-06, "loss": 0.4796, "step": 9314 }, { "epoch": 1.35, "grad_norm": 6.646066188812256, "learning_rate": 1.2254959503239542e-06, "loss": 0.5462, "step": 9315 }, { "epoch": 1.35, "grad_norm": 6.398030757904053, "learning_rate": 1.2253417416290882e-06, "loss": 0.5796, "step": 9316 }, { "epoch": 1.35, "grad_norm": 5.844366073608398, "learning_rate": 1.2251875272886514e-06, "loss": 0.5521, "step": 9317 }, { "epoch": 1.35, "grad_norm": 6.855990409851074, "learning_rate": 1.2250333073065064e-06, "loss": 0.6001, "step": 9318 }, { "epoch": 1.35, "grad_norm": 6.728784561157227, "learning_rate": 1.2248790816865178e-06, "loss": 0.5392, "step": 9319 }, { "epoch": 1.35, "grad_norm": 6.502913951873779, "learning_rate": 1.224724850432549e-06, "loss": 0.4948, "step": 9320 }, { "epoch": 1.35, "grad_norm": 6.4792375564575195, "learning_rate": 1.2245706135484643e-06, "loss": 0.5, "step": 9321 }, { "epoch": 1.35, "grad_norm": 6.27724552154541, "learning_rate": 1.224416371038128e-06, "loss": 0.5697, "step": 9322 }, { "epoch": 1.35, "grad_norm": 5.640323162078857, "learning_rate": 1.2242621229054038e-06, "loss": 0.5246, "step": 9323 }, { "epoch": 1.35, "grad_norm": 6.739475727081299, "learning_rate": 1.2241078691541567e-06, "loss": 0.5186, "step": 9324 }, { "epoch": 1.35, "grad_norm": 6.38823938369751, "learning_rate": 1.2239536097882512e-06, "loss": 0.525, "step": 9325 }, { "epoch": 1.35, "grad_norm": 6.866274833679199, "learning_rate": 1.2237993448115518e-06, "loss": 0.5575, "step": 9326 }, { "epoch": 1.35, "grad_norm": 5.9201250076293945, "learning_rate": 1.2236450742279235e-06, "loss": 0.5566, "step": 9327 }, { "epoch": 1.35, "grad_norm": 6.063043117523193, "learning_rate": 1.223490798041231e-06, "loss": 0.4691, "step": 9328 }, { "epoch": 1.35, "grad_norm": 5.96660852432251, "learning_rate": 1.2233365162553395e-06, "loss": 0.5688, "step": 9329 }, { "epoch": 1.35, "grad_norm": 6.529111862182617, "learning_rate": 1.223182228874115e-06, "loss": 0.568, "step": 9330 }, { "epoch": 1.35, "grad_norm": 6.077782154083252, "learning_rate": 1.2230279359014222e-06, "loss": 0.4918, "step": 9331 }, { "epoch": 1.35, "grad_norm": 7.125422477722168, "learning_rate": 1.2228736373411268e-06, "loss": 0.4965, "step": 9332 }, { "epoch": 1.35, "grad_norm": 6.33922004699707, "learning_rate": 1.2227193331970947e-06, "loss": 0.5811, "step": 9333 }, { "epoch": 1.35, "grad_norm": 7.648461818695068, "learning_rate": 1.2225650234731914e-06, "loss": 0.5692, "step": 9334 }, { "epoch": 1.35, "grad_norm": 6.851508617401123, "learning_rate": 1.222410708173283e-06, "loss": 0.5223, "step": 9335 }, { "epoch": 1.35, "grad_norm": 6.6029767990112305, "learning_rate": 1.2222563873012358e-06, "loss": 0.557, "step": 9336 }, { "epoch": 1.35, "grad_norm": 6.751140594482422, "learning_rate": 1.222102060860916e-06, "loss": 0.5868, "step": 9337 }, { "epoch": 1.35, "grad_norm": 6.247158527374268, "learning_rate": 1.22194772885619e-06, "loss": 0.5265, "step": 9338 }, { "epoch": 1.36, "grad_norm": 7.094658851623535, "learning_rate": 1.2217933912909244e-06, "loss": 0.4956, "step": 9339 }, { "epoch": 1.36, "grad_norm": 6.5777482986450195, "learning_rate": 1.2216390481689855e-06, "loss": 0.5465, "step": 9340 }, { "epoch": 1.36, "grad_norm": 7.448022842407227, "learning_rate": 1.2214846994942405e-06, "loss": 0.5618, "step": 9341 }, { "epoch": 1.36, "grad_norm": 6.855786323547363, "learning_rate": 1.2213303452705562e-06, "loss": 0.5383, "step": 9342 }, { "epoch": 1.36, "grad_norm": 6.749046325683594, "learning_rate": 1.2211759855017996e-06, "loss": 0.501, "step": 9343 }, { "epoch": 1.36, "grad_norm": 6.39253568649292, "learning_rate": 1.221021620191838e-06, "loss": 0.5313, "step": 9344 }, { "epoch": 1.36, "grad_norm": 6.594376087188721, "learning_rate": 1.220867249344539e-06, "loss": 0.5675, "step": 9345 }, { "epoch": 1.36, "grad_norm": 6.893136978149414, "learning_rate": 1.22071287296377e-06, "loss": 0.5019, "step": 9346 }, { "epoch": 1.36, "grad_norm": 6.498453617095947, "learning_rate": 1.2205584910533985e-06, "loss": 0.5964, "step": 9347 }, { "epoch": 1.36, "grad_norm": 6.406711101531982, "learning_rate": 1.2204041036172924e-06, "loss": 0.5075, "step": 9348 }, { "epoch": 1.36, "grad_norm": 6.404629230499268, "learning_rate": 1.2202497106593195e-06, "loss": 0.4843, "step": 9349 }, { "epoch": 1.36, "grad_norm": 6.189347267150879, "learning_rate": 1.2200953121833482e-06, "loss": 0.5541, "step": 9350 }, { "epoch": 1.36, "grad_norm": 6.281065940856934, "learning_rate": 1.219940908193246e-06, "loss": 0.4887, "step": 9351 }, { "epoch": 1.36, "grad_norm": 6.095775604248047, "learning_rate": 1.2197864986928822e-06, "loss": 0.5397, "step": 9352 }, { "epoch": 1.36, "grad_norm": 6.659062385559082, "learning_rate": 1.2196320836861246e-06, "loss": 0.5637, "step": 9353 }, { "epoch": 1.36, "grad_norm": 6.283161163330078, "learning_rate": 1.2194776631768422e-06, "loss": 0.57, "step": 9354 }, { "epoch": 1.36, "grad_norm": 6.109605312347412, "learning_rate": 1.219323237168903e-06, "loss": 0.5162, "step": 9355 }, { "epoch": 1.36, "grad_norm": 6.266476154327393, "learning_rate": 1.219168805666177e-06, "loss": 0.5511, "step": 9356 }, { "epoch": 1.36, "grad_norm": 6.190852165222168, "learning_rate": 1.2190143686725324e-06, "loss": 0.5351, "step": 9357 }, { "epoch": 1.36, "grad_norm": 5.854244709014893, "learning_rate": 1.2188599261918388e-06, "loss": 0.4834, "step": 9358 }, { "epoch": 1.36, "grad_norm": 6.586728096008301, "learning_rate": 1.2187054782279656e-06, "loss": 0.6421, "step": 9359 }, { "epoch": 1.36, "grad_norm": 6.5714850425720215, "learning_rate": 1.2185510247847816e-06, "loss": 0.5523, "step": 9360 }, { "epoch": 1.36, "grad_norm": 7.016673564910889, "learning_rate": 1.218396565866157e-06, "loss": 0.546, "step": 9361 }, { "epoch": 1.36, "grad_norm": 6.893200397491455, "learning_rate": 1.2182421014759611e-06, "loss": 0.6103, "step": 9362 }, { "epoch": 1.36, "grad_norm": 6.457516670227051, "learning_rate": 1.2180876316180642e-06, "loss": 0.544, "step": 9363 }, { "epoch": 1.36, "grad_norm": 6.550778865814209, "learning_rate": 1.2179331562963358e-06, "loss": 0.5474, "step": 9364 }, { "epoch": 1.36, "grad_norm": 6.624397277832031, "learning_rate": 1.2177786755146465e-06, "loss": 0.5131, "step": 9365 }, { "epoch": 1.36, "grad_norm": 11.49870491027832, "learning_rate": 1.2176241892768665e-06, "loss": 0.5069, "step": 9366 }, { "epoch": 1.36, "grad_norm": 6.686495304107666, "learning_rate": 1.2174696975868654e-06, "loss": 0.5568, "step": 9367 }, { "epoch": 1.36, "grad_norm": 5.673189163208008, "learning_rate": 1.2173152004485148e-06, "loss": 0.5109, "step": 9368 }, { "epoch": 1.36, "grad_norm": 5.608645439147949, "learning_rate": 1.217160697865685e-06, "loss": 0.5109, "step": 9369 }, { "epoch": 1.36, "grad_norm": 6.536577224731445, "learning_rate": 1.2170061898422466e-06, "loss": 0.5531, "step": 9370 }, { "epoch": 1.36, "grad_norm": 6.434295177459717, "learning_rate": 1.216851676382071e-06, "loss": 0.5999, "step": 9371 }, { "epoch": 1.36, "grad_norm": 6.46467399597168, "learning_rate": 1.2166971574890286e-06, "loss": 0.6015, "step": 9372 }, { "epoch": 1.36, "grad_norm": 6.739181995391846, "learning_rate": 1.2165426331669912e-06, "loss": 0.522, "step": 9373 }, { "epoch": 1.36, "grad_norm": 6.587144374847412, "learning_rate": 1.21638810341983e-06, "loss": 0.5395, "step": 9374 }, { "epoch": 1.36, "grad_norm": 6.641557693481445, "learning_rate": 1.2162335682514164e-06, "loss": 0.6024, "step": 9375 }, { "epoch": 1.36, "grad_norm": 6.315204620361328, "learning_rate": 1.2160790276656221e-06, "loss": 0.5064, "step": 9376 }, { "epoch": 1.36, "grad_norm": 6.135184288024902, "learning_rate": 1.2159244816663187e-06, "loss": 0.5654, "step": 9377 }, { "epoch": 1.36, "grad_norm": 6.610497951507568, "learning_rate": 1.2157699302573784e-06, "loss": 0.579, "step": 9378 }, { "epoch": 1.36, "grad_norm": 6.226792812347412, "learning_rate": 1.2156153734426732e-06, "loss": 0.5132, "step": 9379 }, { "epoch": 1.36, "grad_norm": 6.487077713012695, "learning_rate": 1.215460811226075e-06, "loss": 0.5224, "step": 9380 }, { "epoch": 1.36, "grad_norm": 6.563910484313965, "learning_rate": 1.2153062436114563e-06, "loss": 0.5896, "step": 9381 }, { "epoch": 1.36, "grad_norm": 6.538657188415527, "learning_rate": 1.2151516706026896e-06, "loss": 0.5837, "step": 9382 }, { "epoch": 1.36, "grad_norm": 5.856820583343506, "learning_rate": 1.2149970922036473e-06, "loss": 0.5706, "step": 9383 }, { "epoch": 1.36, "grad_norm": 5.707991123199463, "learning_rate": 1.2148425084182022e-06, "loss": 0.4987, "step": 9384 }, { "epoch": 1.36, "grad_norm": 5.697444915771484, "learning_rate": 1.214687919250227e-06, "loss": 0.4205, "step": 9385 }, { "epoch": 1.36, "grad_norm": 6.088843822479248, "learning_rate": 1.214533324703595e-06, "loss": 0.5175, "step": 9386 }, { "epoch": 1.36, "grad_norm": 6.695834159851074, "learning_rate": 1.2143787247821793e-06, "loss": 0.471, "step": 9387 }, { "epoch": 1.36, "grad_norm": 6.87791109085083, "learning_rate": 1.2142241194898526e-06, "loss": 0.5535, "step": 9388 }, { "epoch": 1.36, "grad_norm": 6.653763294219971, "learning_rate": 1.2140695088304888e-06, "loss": 0.5808, "step": 9389 }, { "epoch": 1.36, "grad_norm": 6.867107391357422, "learning_rate": 1.2139148928079612e-06, "loss": 0.5751, "step": 9390 }, { "epoch": 1.36, "grad_norm": 6.846469402313232, "learning_rate": 1.2137602714261437e-06, "loss": 0.6709, "step": 9391 }, { "epoch": 1.36, "grad_norm": 5.83657169342041, "learning_rate": 1.2136056446889097e-06, "loss": 0.5366, "step": 9392 }, { "epoch": 1.36, "grad_norm": 7.0522589683532715, "learning_rate": 1.2134510126001335e-06, "loss": 0.5843, "step": 9393 }, { "epoch": 1.36, "grad_norm": 6.392883777618408, "learning_rate": 1.2132963751636893e-06, "loss": 0.5659, "step": 9394 }, { "epoch": 1.36, "grad_norm": 6.843387603759766, "learning_rate": 1.2131417323834508e-06, "loss": 0.5184, "step": 9395 }, { "epoch": 1.36, "grad_norm": 7.2277069091796875, "learning_rate": 1.2129870842632924e-06, "loss": 0.5622, "step": 9396 }, { "epoch": 1.36, "grad_norm": 7.2108025550842285, "learning_rate": 1.2128324308070885e-06, "loss": 0.6724, "step": 9397 }, { "epoch": 1.36, "grad_norm": 6.027571201324463, "learning_rate": 1.212677772018714e-06, "loss": 0.4963, "step": 9398 }, { "epoch": 1.36, "grad_norm": 6.611382007598877, "learning_rate": 1.2125231079020434e-06, "loss": 0.5793, "step": 9399 }, { "epoch": 1.36, "grad_norm": 6.696995258331299, "learning_rate": 1.2123684384609518e-06, "loss": 0.5584, "step": 9400 }, { "epoch": 1.36, "grad_norm": 6.868701934814453, "learning_rate": 1.212213763699314e-06, "loss": 0.5182, "step": 9401 }, { "epoch": 1.36, "grad_norm": 6.200364112854004, "learning_rate": 1.2120590836210053e-06, "loss": 0.4895, "step": 9402 }, { "epoch": 1.36, "grad_norm": 5.678405284881592, "learning_rate": 1.211904398229901e-06, "loss": 0.4783, "step": 9403 }, { "epoch": 1.36, "grad_norm": 6.894890308380127, "learning_rate": 1.2117497075298756e-06, "loss": 0.5257, "step": 9404 }, { "epoch": 1.36, "grad_norm": 6.801486968994141, "learning_rate": 1.211595011524806e-06, "loss": 0.6612, "step": 9405 }, { "epoch": 1.36, "grad_norm": 6.30773401260376, "learning_rate": 1.2114403102185668e-06, "loss": 0.5356, "step": 9406 }, { "epoch": 1.36, "grad_norm": 6.8189592361450195, "learning_rate": 1.2112856036150344e-06, "loss": 0.5281, "step": 9407 }, { "epoch": 1.37, "grad_norm": 5.609860420227051, "learning_rate": 1.2111308917180843e-06, "loss": 0.5776, "step": 9408 }, { "epoch": 1.37, "grad_norm": 7.216665744781494, "learning_rate": 1.2109761745315927e-06, "loss": 0.5937, "step": 9409 }, { "epoch": 1.37, "grad_norm": 6.088231086730957, "learning_rate": 1.210821452059436e-06, "loss": 0.5085, "step": 9410 }, { "epoch": 1.37, "grad_norm": 6.653459548950195, "learning_rate": 1.2106667243054904e-06, "loss": 0.5835, "step": 9411 }, { "epoch": 1.37, "grad_norm": 5.694510459899902, "learning_rate": 1.2105119912736324e-06, "loss": 0.522, "step": 9412 }, { "epoch": 1.37, "grad_norm": 6.426827430725098, "learning_rate": 1.2103572529677383e-06, "loss": 0.6016, "step": 9413 }, { "epoch": 1.37, "grad_norm": 6.955657958984375, "learning_rate": 1.2102025093916854e-06, "loss": 0.6617, "step": 9414 }, { "epoch": 1.37, "grad_norm": 5.834656715393066, "learning_rate": 1.21004776054935e-06, "loss": 0.4539, "step": 9415 }, { "epoch": 1.37, "grad_norm": 5.816635608673096, "learning_rate": 1.209893006444609e-06, "loss": 0.5072, "step": 9416 }, { "epoch": 1.37, "grad_norm": 6.590517520904541, "learning_rate": 1.20973824708134e-06, "loss": 0.5973, "step": 9417 }, { "epoch": 1.37, "grad_norm": 5.783984184265137, "learning_rate": 1.2095834824634196e-06, "loss": 0.4911, "step": 9418 }, { "epoch": 1.37, "grad_norm": 6.498347282409668, "learning_rate": 1.209428712594726e-06, "loss": 0.548, "step": 9419 }, { "epoch": 1.37, "grad_norm": 6.568902492523193, "learning_rate": 1.2092739374791363e-06, "loss": 0.5534, "step": 9420 }, { "epoch": 1.37, "grad_norm": 7.178332805633545, "learning_rate": 1.2091191571205278e-06, "loss": 0.5715, "step": 9421 }, { "epoch": 1.37, "grad_norm": 6.585375785827637, "learning_rate": 1.2089643715227792e-06, "loss": 0.4983, "step": 9422 }, { "epoch": 1.37, "grad_norm": 6.268295764923096, "learning_rate": 1.2088095806897673e-06, "loss": 0.5449, "step": 9423 }, { "epoch": 1.37, "grad_norm": 6.346785068511963, "learning_rate": 1.208654784625371e-06, "loss": 0.5524, "step": 9424 }, { "epoch": 1.37, "grad_norm": 7.202087879180908, "learning_rate": 1.2084999833334678e-06, "loss": 0.6532, "step": 9425 }, { "epoch": 1.37, "grad_norm": 6.413900852203369, "learning_rate": 1.2083451768179362e-06, "loss": 0.5175, "step": 9426 }, { "epoch": 1.37, "grad_norm": 6.277168273925781, "learning_rate": 1.2081903650826548e-06, "loss": 0.6625, "step": 9427 }, { "epoch": 1.37, "grad_norm": 5.861119747161865, "learning_rate": 1.2080355481315024e-06, "loss": 0.5004, "step": 9428 }, { "epoch": 1.37, "grad_norm": 6.9838337898254395, "learning_rate": 1.2078807259683573e-06, "loss": 0.5887, "step": 9429 }, { "epoch": 1.37, "grad_norm": 6.393823146820068, "learning_rate": 1.2077258985970981e-06, "loss": 0.5669, "step": 9430 }, { "epoch": 1.37, "grad_norm": 5.874136924743652, "learning_rate": 1.2075710660216042e-06, "loss": 0.5013, "step": 9431 }, { "epoch": 1.37, "grad_norm": 6.102438449859619, "learning_rate": 1.2074162282457542e-06, "loss": 0.5759, "step": 9432 }, { "epoch": 1.37, "grad_norm": 6.1286516189575195, "learning_rate": 1.2072613852734278e-06, "loss": 0.5379, "step": 9433 }, { "epoch": 1.37, "grad_norm": 5.641972541809082, "learning_rate": 1.207106537108504e-06, "loss": 0.5334, "step": 9434 }, { "epoch": 1.37, "grad_norm": 6.770640850067139, "learning_rate": 1.206951683754863e-06, "loss": 0.5259, "step": 9435 }, { "epoch": 1.37, "grad_norm": 7.3850531578063965, "learning_rate": 1.2067968252163833e-06, "loss": 0.5765, "step": 9436 }, { "epoch": 1.37, "grad_norm": 6.224325180053711, "learning_rate": 1.2066419614969454e-06, "loss": 0.6141, "step": 9437 }, { "epoch": 1.37, "grad_norm": 6.664273262023926, "learning_rate": 1.2064870926004288e-06, "loss": 0.6045, "step": 9438 }, { "epoch": 1.37, "grad_norm": 6.262876510620117, "learning_rate": 1.2063322185307136e-06, "loss": 0.5318, "step": 9439 }, { "epoch": 1.37, "grad_norm": 6.587736129760742, "learning_rate": 1.20617733929168e-06, "loss": 0.532, "step": 9440 }, { "epoch": 1.37, "grad_norm": 5.626044273376465, "learning_rate": 1.2060224548872083e-06, "loss": 0.4404, "step": 9441 }, { "epoch": 1.37, "grad_norm": 7.512197494506836, "learning_rate": 1.2058675653211785e-06, "loss": 0.5933, "step": 9442 }, { "epoch": 1.37, "grad_norm": 6.94453763961792, "learning_rate": 1.2057126705974714e-06, "loss": 0.5543, "step": 9443 }, { "epoch": 1.37, "grad_norm": 6.736417770385742, "learning_rate": 1.2055577707199675e-06, "loss": 0.5677, "step": 9444 }, { "epoch": 1.37, "grad_norm": 5.97597074508667, "learning_rate": 1.205402865692548e-06, "loss": 0.5196, "step": 9445 }, { "epoch": 1.37, "grad_norm": 5.843594551086426, "learning_rate": 1.205247955519093e-06, "loss": 0.5242, "step": 9446 }, { "epoch": 1.37, "grad_norm": 6.250687599182129, "learning_rate": 1.2050930402034843e-06, "loss": 0.5063, "step": 9447 }, { "epoch": 1.37, "grad_norm": 6.777950286865234, "learning_rate": 1.2049381197496026e-06, "loss": 0.559, "step": 9448 }, { "epoch": 1.37, "grad_norm": 5.864997386932373, "learning_rate": 1.2047831941613292e-06, "loss": 0.4556, "step": 9449 }, { "epoch": 1.37, "grad_norm": 6.016089916229248, "learning_rate": 1.2046282634425457e-06, "loss": 0.5533, "step": 9450 }, { "epoch": 1.37, "grad_norm": 7.262677192687988, "learning_rate": 1.2044733275971336e-06, "loss": 0.5703, "step": 9451 }, { "epoch": 1.37, "grad_norm": 6.248956680297852, "learning_rate": 1.2043183866289745e-06, "loss": 0.5676, "step": 9452 }, { "epoch": 1.37, "grad_norm": 7.116189002990723, "learning_rate": 1.20416344054195e-06, "loss": 0.5487, "step": 9453 }, { "epoch": 1.37, "grad_norm": 6.08431339263916, "learning_rate": 1.2040084893399425e-06, "loss": 0.5106, "step": 9454 }, { "epoch": 1.37, "grad_norm": 7.218909740447998, "learning_rate": 1.2038535330268335e-06, "loss": 0.5802, "step": 9455 }, { "epoch": 1.37, "grad_norm": 6.257230281829834, "learning_rate": 1.2036985716065059e-06, "loss": 0.5361, "step": 9456 }, { "epoch": 1.37, "grad_norm": 7.584728240966797, "learning_rate": 1.2035436050828414e-06, "loss": 0.4923, "step": 9457 }, { "epoch": 1.37, "grad_norm": 6.154011249542236, "learning_rate": 1.2033886334597222e-06, "loss": 0.5174, "step": 9458 }, { "epoch": 1.37, "grad_norm": 6.2602057456970215, "learning_rate": 1.2032336567410317e-06, "loss": 0.4993, "step": 9459 }, { "epoch": 1.37, "grad_norm": 5.938019275665283, "learning_rate": 1.203078674930652e-06, "loss": 0.5301, "step": 9460 }, { "epoch": 1.37, "grad_norm": 7.408723831176758, "learning_rate": 1.2029236880324658e-06, "loss": 0.6149, "step": 9461 }, { "epoch": 1.37, "grad_norm": 6.662847518920898, "learning_rate": 1.2027686960503564e-06, "loss": 0.5594, "step": 9462 }, { "epoch": 1.37, "grad_norm": 6.848250865936279, "learning_rate": 1.2026136989882071e-06, "loss": 0.5948, "step": 9463 }, { "epoch": 1.37, "grad_norm": 6.199655532836914, "learning_rate": 1.2024586968499008e-06, "loss": 0.5096, "step": 9464 }, { "epoch": 1.37, "grad_norm": 6.248237133026123, "learning_rate": 1.2023036896393206e-06, "loss": 0.5172, "step": 9465 }, { "epoch": 1.37, "grad_norm": 6.818877220153809, "learning_rate": 1.2021486773603502e-06, "loss": 0.5042, "step": 9466 }, { "epoch": 1.37, "grad_norm": 6.248847484588623, "learning_rate": 1.2019936600168733e-06, "loss": 0.5883, "step": 9467 }, { "epoch": 1.37, "grad_norm": 6.48443603515625, "learning_rate": 1.2018386376127732e-06, "loss": 0.5236, "step": 9468 }, { "epoch": 1.37, "grad_norm": 6.175313949584961, "learning_rate": 1.2016836101519341e-06, "loss": 0.5113, "step": 9469 }, { "epoch": 1.37, "grad_norm": 5.792010307312012, "learning_rate": 1.20152857763824e-06, "loss": 0.6088, "step": 9470 }, { "epoch": 1.37, "grad_norm": 6.813901424407959, "learning_rate": 1.2013735400755746e-06, "loss": 0.5237, "step": 9471 }, { "epoch": 1.37, "grad_norm": 5.968355655670166, "learning_rate": 1.2012184974678225e-06, "loss": 0.4942, "step": 9472 }, { "epoch": 1.37, "grad_norm": 6.355141639709473, "learning_rate": 1.201063449818868e-06, "loss": 0.5183, "step": 9473 }, { "epoch": 1.37, "grad_norm": 6.793954849243164, "learning_rate": 1.200908397132595e-06, "loss": 0.6244, "step": 9474 }, { "epoch": 1.37, "grad_norm": 6.808029651641846, "learning_rate": 1.200753339412889e-06, "loss": 0.4891, "step": 9475 }, { "epoch": 1.37, "grad_norm": 5.779775619506836, "learning_rate": 1.200598276663634e-06, "loss": 0.5154, "step": 9476 }, { "epoch": 1.38, "grad_norm": 6.434963226318359, "learning_rate": 1.2004432088887152e-06, "loss": 0.5359, "step": 9477 }, { "epoch": 1.38, "grad_norm": 6.834457874298096, "learning_rate": 1.2002881360920176e-06, "loss": 0.5856, "step": 9478 }, { "epoch": 1.38, "grad_norm": 5.988202095031738, "learning_rate": 1.2001330582774262e-06, "loss": 0.5151, "step": 9479 }, { "epoch": 1.38, "grad_norm": 6.4789652824401855, "learning_rate": 1.1999779754488262e-06, "loss": 0.5342, "step": 9480 }, { "epoch": 1.38, "grad_norm": 6.164137363433838, "learning_rate": 1.1998228876101028e-06, "loss": 0.586, "step": 9481 }, { "epoch": 1.38, "grad_norm": 6.002048015594482, "learning_rate": 1.1996677947651418e-06, "loss": 0.5417, "step": 9482 }, { "epoch": 1.38, "grad_norm": 5.968355655670166, "learning_rate": 1.1995126969178284e-06, "loss": 0.5097, "step": 9483 }, { "epoch": 1.38, "grad_norm": 6.731950283050537, "learning_rate": 1.1993575940720486e-06, "loss": 0.5491, "step": 9484 }, { "epoch": 1.38, "grad_norm": 5.788330554962158, "learning_rate": 1.1992024862316885e-06, "loss": 0.5101, "step": 9485 }, { "epoch": 1.38, "grad_norm": 6.1958160400390625, "learning_rate": 1.1990473734006331e-06, "loss": 0.5803, "step": 9486 }, { "epoch": 1.38, "grad_norm": 6.0542988777160645, "learning_rate": 1.19889225558277e-06, "loss": 0.5244, "step": 9487 }, { "epoch": 1.38, "grad_norm": 7.651740074157715, "learning_rate": 1.1987371327819839e-06, "loss": 0.5908, "step": 9488 }, { "epoch": 1.38, "grad_norm": 6.487105369567871, "learning_rate": 1.1985820050021622e-06, "loss": 0.5722, "step": 9489 }, { "epoch": 1.38, "grad_norm": 6.942577362060547, "learning_rate": 1.198426872247191e-06, "loss": 0.6141, "step": 9490 }, { "epoch": 1.38, "grad_norm": 5.800792217254639, "learning_rate": 1.1982717345209572e-06, "loss": 0.551, "step": 9491 }, { "epoch": 1.38, "grad_norm": 6.413824081420898, "learning_rate": 1.198116591827347e-06, "loss": 0.6033, "step": 9492 }, { "epoch": 1.38, "grad_norm": 5.966275691986084, "learning_rate": 1.1979614441702474e-06, "loss": 0.4852, "step": 9493 }, { "epoch": 1.38, "grad_norm": 6.411505222320557, "learning_rate": 1.1978062915535456e-06, "loss": 0.606, "step": 9494 }, { "epoch": 1.38, "grad_norm": 5.956206321716309, "learning_rate": 1.1976511339811283e-06, "loss": 0.4933, "step": 9495 }, { "epoch": 1.38, "grad_norm": 6.5379743576049805, "learning_rate": 1.1974959714568833e-06, "loss": 0.5402, "step": 9496 }, { "epoch": 1.38, "grad_norm": 6.726457595825195, "learning_rate": 1.1973408039846975e-06, "loss": 0.591, "step": 9497 }, { "epoch": 1.38, "grad_norm": 5.789891242980957, "learning_rate": 1.1971856315684587e-06, "loss": 0.518, "step": 9498 }, { "epoch": 1.38, "grad_norm": 5.713914394378662, "learning_rate": 1.1970304542120541e-06, "loss": 0.5008, "step": 9499 }, { "epoch": 1.38, "grad_norm": 5.972805500030518, "learning_rate": 1.1968752719193718e-06, "loss": 0.547, "step": 9500 }, { "epoch": 1.38, "grad_norm": 7.026486396789551, "learning_rate": 1.1967200846942993e-06, "loss": 0.5787, "step": 9501 }, { "epoch": 1.38, "grad_norm": 6.223073959350586, "learning_rate": 1.1965648925407245e-06, "loss": 0.5016, "step": 9502 }, { "epoch": 1.38, "grad_norm": 6.442111015319824, "learning_rate": 1.196409695462536e-06, "loss": 0.5345, "step": 9503 }, { "epoch": 1.38, "grad_norm": 6.395659446716309, "learning_rate": 1.1962544934636217e-06, "loss": 0.5301, "step": 9504 }, { "epoch": 1.38, "grad_norm": 6.100698471069336, "learning_rate": 1.19609928654787e-06, "loss": 0.5259, "step": 9505 }, { "epoch": 1.38, "grad_norm": 6.485293388366699, "learning_rate": 1.1959440747191692e-06, "loss": 0.5196, "step": 9506 }, { "epoch": 1.38, "grad_norm": 6.593997955322266, "learning_rate": 1.195788857981408e-06, "loss": 0.5596, "step": 9507 }, { "epoch": 1.38, "grad_norm": 7.181877613067627, "learning_rate": 1.1956336363384749e-06, "loss": 0.572, "step": 9508 }, { "epoch": 1.38, "grad_norm": 5.746713638305664, "learning_rate": 1.195478409794259e-06, "loss": 0.5227, "step": 9509 }, { "epoch": 1.38, "grad_norm": 5.844759941101074, "learning_rate": 1.1953231783526493e-06, "loss": 0.5316, "step": 9510 }, { "epoch": 1.38, "grad_norm": 5.7658867835998535, "learning_rate": 1.1951679420175346e-06, "loss": 0.4725, "step": 9511 }, { "epoch": 1.38, "grad_norm": 6.413593769073486, "learning_rate": 1.195012700792804e-06, "loss": 0.6341, "step": 9512 }, { "epoch": 1.38, "grad_norm": 6.459867477416992, "learning_rate": 1.1948574546823475e-06, "loss": 0.5382, "step": 9513 }, { "epoch": 1.38, "grad_norm": 5.854360103607178, "learning_rate": 1.1947022036900538e-06, "loss": 0.554, "step": 9514 }, { "epoch": 1.38, "grad_norm": 6.372834205627441, "learning_rate": 1.194546947819813e-06, "loss": 0.495, "step": 9515 }, { "epoch": 1.38, "grad_norm": 6.487029552459717, "learning_rate": 1.1943916870755142e-06, "loss": 0.5615, "step": 9516 }, { "epoch": 1.38, "grad_norm": 6.290046691894531, "learning_rate": 1.1942364214610479e-06, "loss": 0.5077, "step": 9517 }, { "epoch": 1.38, "grad_norm": 6.985540390014648, "learning_rate": 1.1940811509803034e-06, "loss": 0.5367, "step": 9518 }, { "epoch": 1.38, "grad_norm": 6.700373649597168, "learning_rate": 1.1939258756371713e-06, "loss": 0.6058, "step": 9519 }, { "epoch": 1.38, "grad_norm": 7.655779838562012, "learning_rate": 1.1937705954355412e-06, "loss": 0.5102, "step": 9520 }, { "epoch": 1.38, "grad_norm": 6.396766662597656, "learning_rate": 1.1936153103793036e-06, "loss": 0.509, "step": 9521 }, { "epoch": 1.38, "grad_norm": 6.9730610847473145, "learning_rate": 1.1934600204723493e-06, "loss": 0.6074, "step": 9522 }, { "epoch": 1.38, "grad_norm": 6.281669616699219, "learning_rate": 1.1933047257185683e-06, "loss": 0.5647, "step": 9523 }, { "epoch": 1.38, "grad_norm": 6.75586462020874, "learning_rate": 1.1931494261218516e-06, "loss": 0.5666, "step": 9524 }, { "epoch": 1.38, "grad_norm": 6.431787490844727, "learning_rate": 1.1929941216860897e-06, "loss": 0.5316, "step": 9525 }, { "epoch": 1.38, "grad_norm": 6.23868465423584, "learning_rate": 1.1928388124151738e-06, "loss": 0.5492, "step": 9526 }, { "epoch": 1.38, "grad_norm": 6.540699005126953, "learning_rate": 1.192683498312995e-06, "loss": 0.5948, "step": 9527 }, { "epoch": 1.38, "grad_norm": 6.750879764556885, "learning_rate": 1.1925281793834438e-06, "loss": 0.5247, "step": 9528 }, { "epoch": 1.38, "grad_norm": 6.2643842697143555, "learning_rate": 1.192372855630412e-06, "loss": 0.5169, "step": 9529 }, { "epoch": 1.38, "grad_norm": 6.793710708618164, "learning_rate": 1.192217527057791e-06, "loss": 0.5776, "step": 9530 }, { "epoch": 1.38, "grad_norm": 7.795595169067383, "learning_rate": 1.1920621936694721e-06, "loss": 0.5645, "step": 9531 }, { "epoch": 1.38, "grad_norm": 5.891526222229004, "learning_rate": 1.191906855469347e-06, "loss": 0.5145, "step": 9532 }, { "epoch": 1.38, "grad_norm": 6.149905681610107, "learning_rate": 1.1917515124613077e-06, "loss": 0.5711, "step": 9533 }, { "epoch": 1.38, "grad_norm": 6.375499248504639, "learning_rate": 1.1915961646492457e-06, "loss": 0.5429, "step": 9534 }, { "epoch": 1.38, "grad_norm": 6.481986999511719, "learning_rate": 1.191440812037053e-06, "loss": 0.5581, "step": 9535 }, { "epoch": 1.38, "grad_norm": 7.906643390655518, "learning_rate": 1.1912854546286218e-06, "loss": 0.5278, "step": 9536 }, { "epoch": 1.38, "grad_norm": 6.568152904510498, "learning_rate": 1.1911300924278442e-06, "loss": 0.5351, "step": 9537 }, { "epoch": 1.38, "grad_norm": 7.199069499969482, "learning_rate": 1.1909747254386127e-06, "loss": 0.5469, "step": 9538 }, { "epoch": 1.38, "grad_norm": 6.824443817138672, "learning_rate": 1.1908193536648202e-06, "loss": 0.5201, "step": 9539 }, { "epoch": 1.38, "grad_norm": 6.680647373199463, "learning_rate": 1.1906639771103584e-06, "loss": 0.5617, "step": 9540 }, { "epoch": 1.38, "grad_norm": 6.341238498687744, "learning_rate": 1.1905085957791206e-06, "loss": 0.4516, "step": 9541 }, { "epoch": 1.38, "grad_norm": 6.889520645141602, "learning_rate": 1.1903532096749996e-06, "loss": 0.5423, "step": 9542 }, { "epoch": 1.38, "grad_norm": 6.360406875610352, "learning_rate": 1.190197818801888e-06, "loss": 0.4845, "step": 9543 }, { "epoch": 1.38, "grad_norm": 7.0890045166015625, "learning_rate": 1.1900424231636793e-06, "loss": 0.561, "step": 9544 }, { "epoch": 1.38, "grad_norm": 6.821379661560059, "learning_rate": 1.1898870227642661e-06, "loss": 0.5278, "step": 9545 }, { "epoch": 1.39, "grad_norm": 5.986245155334473, "learning_rate": 1.1897316176075425e-06, "loss": 0.533, "step": 9546 }, { "epoch": 1.39, "grad_norm": 7.4168596267700195, "learning_rate": 1.1895762076974014e-06, "loss": 0.6411, "step": 9547 }, { "epoch": 1.39, "grad_norm": 7.1340484619140625, "learning_rate": 1.1894207930377365e-06, "loss": 0.5357, "step": 9548 }, { "epoch": 1.39, "grad_norm": 6.88755464553833, "learning_rate": 1.1892653736324412e-06, "loss": 0.4756, "step": 9549 }, { "epoch": 1.39, "grad_norm": 6.298286437988281, "learning_rate": 1.1891099494854096e-06, "loss": 0.5208, "step": 9550 }, { "epoch": 1.39, "grad_norm": 5.851644992828369, "learning_rate": 1.1889545206005353e-06, "loss": 0.5362, "step": 9551 }, { "epoch": 1.39, "grad_norm": 5.6193013191223145, "learning_rate": 1.188799086981713e-06, "loss": 0.5354, "step": 9552 }, { "epoch": 1.39, "grad_norm": 6.527441024780273, "learning_rate": 1.1886436486328358e-06, "loss": 0.533, "step": 9553 }, { "epoch": 1.39, "grad_norm": 6.004609107971191, "learning_rate": 1.188488205557799e-06, "loss": 0.5459, "step": 9554 }, { "epoch": 1.39, "grad_norm": 6.548900604248047, "learning_rate": 1.188332757760496e-06, "loss": 0.5746, "step": 9555 }, { "epoch": 1.39, "grad_norm": 6.331089019775391, "learning_rate": 1.1881773052448218e-06, "loss": 0.4889, "step": 9556 }, { "epoch": 1.39, "grad_norm": 6.147045612335205, "learning_rate": 1.1880218480146714e-06, "loss": 0.4873, "step": 9557 }, { "epoch": 1.39, "grad_norm": 6.808821201324463, "learning_rate": 1.1878663860739387e-06, "loss": 0.5214, "step": 9558 }, { "epoch": 1.39, "grad_norm": 6.96442985534668, "learning_rate": 1.187710919426519e-06, "loss": 0.6012, "step": 9559 }, { "epoch": 1.39, "grad_norm": 6.334407329559326, "learning_rate": 1.1875554480763073e-06, "loss": 0.5094, "step": 9560 }, { "epoch": 1.39, "grad_norm": 7.480635643005371, "learning_rate": 1.1873999720271988e-06, "loss": 0.5907, "step": 9561 }, { "epoch": 1.39, "grad_norm": 6.706788063049316, "learning_rate": 1.1872444912830881e-06, "loss": 0.5717, "step": 9562 }, { "epoch": 1.39, "grad_norm": 6.085970878601074, "learning_rate": 1.1870890058478712e-06, "loss": 0.5834, "step": 9563 }, { "epoch": 1.39, "grad_norm": 6.241243839263916, "learning_rate": 1.186933515725443e-06, "loss": 0.5278, "step": 9564 }, { "epoch": 1.39, "grad_norm": 6.675403118133545, "learning_rate": 1.1867780209196992e-06, "loss": 0.4734, "step": 9565 }, { "epoch": 1.39, "grad_norm": 6.211068630218506, "learning_rate": 1.1866225214345358e-06, "loss": 0.4498, "step": 9566 }, { "epoch": 1.39, "grad_norm": 6.86325740814209, "learning_rate": 1.1864670172738486e-06, "loss": 0.5441, "step": 9567 }, { "epoch": 1.39, "grad_norm": 6.7621355056762695, "learning_rate": 1.1863115084415329e-06, "loss": 0.5837, "step": 9568 }, { "epoch": 1.39, "grad_norm": 6.772923946380615, "learning_rate": 1.1861559949414852e-06, "loss": 0.5387, "step": 9569 }, { "epoch": 1.39, "grad_norm": 6.62505578994751, "learning_rate": 1.1860004767776014e-06, "loss": 0.5209, "step": 9570 }, { "epoch": 1.39, "grad_norm": 7.622366428375244, "learning_rate": 1.185844953953778e-06, "loss": 0.5542, "step": 9571 }, { "epoch": 1.39, "grad_norm": 6.3372015953063965, "learning_rate": 1.1856894264739112e-06, "loss": 0.5273, "step": 9572 }, { "epoch": 1.39, "grad_norm": 6.120761871337891, "learning_rate": 1.1855338943418977e-06, "loss": 0.4627, "step": 9573 }, { "epoch": 1.39, "grad_norm": 7.559615135192871, "learning_rate": 1.1853783575616342e-06, "loss": 0.5779, "step": 9574 }, { "epoch": 1.39, "grad_norm": 5.713396072387695, "learning_rate": 1.1852228161370165e-06, "loss": 0.5103, "step": 9575 }, { "epoch": 1.39, "grad_norm": 6.365321636199951, "learning_rate": 1.1850672700719425e-06, "loss": 0.5135, "step": 9576 }, { "epoch": 1.39, "grad_norm": 6.070625305175781, "learning_rate": 1.1849117193703083e-06, "loss": 0.5, "step": 9577 }, { "epoch": 1.39, "grad_norm": 5.932689666748047, "learning_rate": 1.1847561640360117e-06, "loss": 0.5168, "step": 9578 }, { "epoch": 1.39, "grad_norm": 6.002782344818115, "learning_rate": 1.1846006040729498e-06, "loss": 0.4803, "step": 9579 }, { "epoch": 1.39, "grad_norm": 6.067595958709717, "learning_rate": 1.1844450394850194e-06, "loss": 0.5419, "step": 9580 }, { "epoch": 1.39, "grad_norm": 5.797341346740723, "learning_rate": 1.1842894702761183e-06, "loss": 0.5194, "step": 9581 }, { "epoch": 1.39, "grad_norm": 6.6062912940979, "learning_rate": 1.1841338964501444e-06, "loss": 0.4765, "step": 9582 }, { "epoch": 1.39, "grad_norm": 6.573071002960205, "learning_rate": 1.1839783180109946e-06, "loss": 0.582, "step": 9583 }, { "epoch": 1.39, "grad_norm": 6.4136528968811035, "learning_rate": 1.1838227349625666e-06, "loss": 0.5687, "step": 9584 }, { "epoch": 1.39, "grad_norm": 6.088045597076416, "learning_rate": 1.1836671473087591e-06, "loss": 0.5386, "step": 9585 }, { "epoch": 1.39, "grad_norm": 6.351434707641602, "learning_rate": 1.1835115550534697e-06, "loss": 0.4909, "step": 9586 }, { "epoch": 1.39, "grad_norm": 6.972691059112549, "learning_rate": 1.1833559582005961e-06, "loss": 0.5232, "step": 9587 }, { "epoch": 1.39, "grad_norm": 6.646836280822754, "learning_rate": 1.183200356754037e-06, "loss": 0.4671, "step": 9588 }, { "epoch": 1.39, "grad_norm": 6.181203365325928, "learning_rate": 1.183044750717691e-06, "loss": 0.5636, "step": 9589 }, { "epoch": 1.39, "grad_norm": 6.58403205871582, "learning_rate": 1.1828891400954557e-06, "loss": 0.5543, "step": 9590 }, { "epoch": 1.39, "grad_norm": 5.766593933105469, "learning_rate": 1.1827335248912303e-06, "loss": 0.5029, "step": 9591 }, { "epoch": 1.39, "grad_norm": 6.775745391845703, "learning_rate": 1.1825779051089133e-06, "loss": 0.4979, "step": 9592 }, { "epoch": 1.39, "grad_norm": 6.54375696182251, "learning_rate": 1.1824222807524035e-06, "loss": 0.5707, "step": 9593 }, { "epoch": 1.39, "grad_norm": 8.140445709228516, "learning_rate": 1.1822666518256002e-06, "loss": 0.6068, "step": 9594 }, { "epoch": 1.39, "grad_norm": 6.7792582511901855, "learning_rate": 1.1821110183324022e-06, "loss": 0.6056, "step": 9595 }, { "epoch": 1.39, "grad_norm": 6.017039775848389, "learning_rate": 1.181955380276708e-06, "loss": 0.5123, "step": 9596 }, { "epoch": 1.39, "grad_norm": 6.604836463928223, "learning_rate": 1.1817997376624175e-06, "loss": 0.5884, "step": 9597 }, { "epoch": 1.39, "grad_norm": 6.859198570251465, "learning_rate": 1.1816440904934303e-06, "loss": 0.5674, "step": 9598 }, { "epoch": 1.39, "grad_norm": 6.530883312225342, "learning_rate": 1.1814884387736452e-06, "loss": 0.4874, "step": 9599 }, { "epoch": 1.39, "grad_norm": 6.590434551239014, "learning_rate": 1.1813327825069623e-06, "loss": 0.5303, "step": 9600 }, { "epoch": 1.39, "grad_norm": 6.865783214569092, "learning_rate": 1.1811771216972813e-06, "loss": 0.6201, "step": 9601 }, { "epoch": 1.39, "grad_norm": 6.248337268829346, "learning_rate": 1.1810214563485019e-06, "loss": 0.5415, "step": 9602 }, { "epoch": 1.39, "grad_norm": 6.958980083465576, "learning_rate": 1.1808657864645239e-06, "loss": 0.5311, "step": 9603 }, { "epoch": 1.39, "grad_norm": 6.000721454620361, "learning_rate": 1.1807101120492476e-06, "loss": 0.517, "step": 9604 }, { "epoch": 1.39, "grad_norm": 6.754013538360596, "learning_rate": 1.1805544331065726e-06, "loss": 0.5392, "step": 9605 }, { "epoch": 1.39, "grad_norm": 6.153004169464111, "learning_rate": 1.1803987496404003e-06, "loss": 0.5231, "step": 9606 }, { "epoch": 1.39, "grad_norm": 6.614119529724121, "learning_rate": 1.18024306165463e-06, "loss": 0.5353, "step": 9607 }, { "epoch": 1.39, "grad_norm": 6.396582126617432, "learning_rate": 1.1800873691531628e-06, "loss": 0.4909, "step": 9608 }, { "epoch": 1.39, "grad_norm": 7.536369323730469, "learning_rate": 1.1799316721398992e-06, "loss": 0.5794, "step": 9609 }, { "epoch": 1.39, "grad_norm": 6.763491153717041, "learning_rate": 1.17977597061874e-06, "loss": 0.5787, "step": 9610 }, { "epoch": 1.39, "grad_norm": 6.351274490356445, "learning_rate": 1.179620264593586e-06, "loss": 0.5026, "step": 9611 }, { "epoch": 1.39, "grad_norm": 7.177145481109619, "learning_rate": 1.1794645540683378e-06, "loss": 0.4932, "step": 9612 }, { "epoch": 1.39, "grad_norm": 6.253347396850586, "learning_rate": 1.179308839046897e-06, "loss": 0.5056, "step": 9613 }, { "epoch": 1.39, "grad_norm": 5.748734474182129, "learning_rate": 1.1791531195331646e-06, "loss": 0.4672, "step": 9614 }, { "epoch": 1.4, "grad_norm": 6.351013660430908, "learning_rate": 1.1789973955310418e-06, "loss": 0.5364, "step": 9615 }, { "epoch": 1.4, "grad_norm": 6.144836902618408, "learning_rate": 1.1788416670444302e-06, "loss": 0.4762, "step": 9616 }, { "epoch": 1.4, "grad_norm": 6.420409679412842, "learning_rate": 1.178685934077231e-06, "loss": 0.5398, "step": 9617 }, { "epoch": 1.4, "grad_norm": 6.1741838455200195, "learning_rate": 1.1785301966333463e-06, "loss": 0.5594, "step": 9618 }, { "epoch": 1.4, "grad_norm": 7.963984489440918, "learning_rate": 1.1783744547166776e-06, "loss": 0.561, "step": 9619 }, { "epoch": 1.4, "grad_norm": 7.1422905921936035, "learning_rate": 1.1782187083311267e-06, "loss": 0.5435, "step": 9620 }, { "epoch": 1.4, "grad_norm": 6.120262145996094, "learning_rate": 1.1780629574805957e-06, "loss": 0.4958, "step": 9621 }, { "epoch": 1.4, "grad_norm": 6.9391398429870605, "learning_rate": 1.1779072021689865e-06, "loss": 0.5435, "step": 9622 }, { "epoch": 1.4, "grad_norm": 6.251773834228516, "learning_rate": 1.1777514424002015e-06, "loss": 0.5336, "step": 9623 }, { "epoch": 1.4, "grad_norm": 6.624938011169434, "learning_rate": 1.1775956781781428e-06, "loss": 0.5758, "step": 9624 }, { "epoch": 1.4, "grad_norm": 7.166837692260742, "learning_rate": 1.177439909506713e-06, "loss": 0.5871, "step": 9625 }, { "epoch": 1.4, "grad_norm": 6.480938911437988, "learning_rate": 1.1772841363898147e-06, "loss": 0.5123, "step": 9626 }, { "epoch": 1.4, "grad_norm": 6.822872638702393, "learning_rate": 1.1771283588313502e-06, "loss": 0.5646, "step": 9627 }, { "epoch": 1.4, "grad_norm": 7.159464359283447, "learning_rate": 1.1769725768352227e-06, "loss": 0.5162, "step": 9628 }, { "epoch": 1.4, "grad_norm": 6.178745269775391, "learning_rate": 1.1768167904053347e-06, "loss": 0.5858, "step": 9629 }, { "epoch": 1.4, "grad_norm": 6.39432954788208, "learning_rate": 1.1766609995455896e-06, "loss": 0.531, "step": 9630 }, { "epoch": 1.4, "grad_norm": 5.523263454437256, "learning_rate": 1.17650520425989e-06, "loss": 0.5206, "step": 9631 }, { "epoch": 1.4, "grad_norm": 6.597973823547363, "learning_rate": 1.1763494045521392e-06, "loss": 0.5535, "step": 9632 }, { "epoch": 1.4, "grad_norm": 6.800389766693115, "learning_rate": 1.1761936004262407e-06, "loss": 0.6358, "step": 9633 }, { "epoch": 1.4, "grad_norm": 7.468108654022217, "learning_rate": 1.1760377918860977e-06, "loss": 0.5988, "step": 9634 }, { "epoch": 1.4, "grad_norm": 6.7291646003723145, "learning_rate": 1.175881978935614e-06, "loss": 0.5915, "step": 9635 }, { "epoch": 1.4, "grad_norm": 5.694855213165283, "learning_rate": 1.175726161578693e-06, "loss": 0.5373, "step": 9636 }, { "epoch": 1.4, "grad_norm": 5.600566387176514, "learning_rate": 1.1755703398192387e-06, "loss": 0.5257, "step": 9637 }, { "epoch": 1.4, "grad_norm": 6.672587871551514, "learning_rate": 1.1754145136611548e-06, "loss": 0.5154, "step": 9638 }, { "epoch": 1.4, "grad_norm": 5.8712544441223145, "learning_rate": 1.175258683108345e-06, "loss": 0.5448, "step": 9639 }, { "epoch": 1.4, "grad_norm": 6.027252674102783, "learning_rate": 1.175102848164714e-06, "loss": 0.6088, "step": 9640 }, { "epoch": 1.4, "grad_norm": 7.073585033416748, "learning_rate": 1.1749470088341655e-06, "loss": 0.5504, "step": 9641 }, { "epoch": 1.4, "grad_norm": 5.9778056144714355, "learning_rate": 1.174791165120604e-06, "loss": 0.5543, "step": 9642 }, { "epoch": 1.4, "grad_norm": 6.524316787719727, "learning_rate": 1.1746353170279337e-06, "loss": 0.5364, "step": 9643 }, { "epoch": 1.4, "grad_norm": 6.307277679443359, "learning_rate": 1.1744794645600592e-06, "loss": 0.5533, "step": 9644 }, { "epoch": 1.4, "grad_norm": 6.741336345672607, "learning_rate": 1.1743236077208855e-06, "loss": 0.51, "step": 9645 }, { "epoch": 1.4, "grad_norm": 6.77489709854126, "learning_rate": 1.174167746514317e-06, "loss": 0.5465, "step": 9646 }, { "epoch": 1.4, "grad_norm": 6.600705146789551, "learning_rate": 1.1740118809442586e-06, "loss": 0.5741, "step": 9647 }, { "epoch": 1.4, "grad_norm": 6.471181869506836, "learning_rate": 1.1738560110146152e-06, "loss": 0.5087, "step": 9648 }, { "epoch": 1.4, "grad_norm": 7.030767440795898, "learning_rate": 1.1737001367292917e-06, "loss": 0.6062, "step": 9649 }, { "epoch": 1.4, "grad_norm": 7.1974310874938965, "learning_rate": 1.173544258092194e-06, "loss": 0.5941, "step": 9650 }, { "epoch": 1.4, "grad_norm": 6.2205915451049805, "learning_rate": 1.1733883751072267e-06, "loss": 0.4878, "step": 9651 }, { "epoch": 1.4, "grad_norm": 6.812394618988037, "learning_rate": 1.1732324877782954e-06, "loss": 0.5299, "step": 9652 }, { "epoch": 1.4, "grad_norm": 6.729137420654297, "learning_rate": 1.1730765961093054e-06, "loss": 0.5659, "step": 9653 }, { "epoch": 1.4, "grad_norm": 6.483994007110596, "learning_rate": 1.1729207001041628e-06, "loss": 0.5221, "step": 9654 }, { "epoch": 1.4, "grad_norm": 5.981049060821533, "learning_rate": 1.1727647997667729e-06, "loss": 0.5217, "step": 9655 }, { "epoch": 1.4, "grad_norm": 6.827564239501953, "learning_rate": 1.1726088951010415e-06, "loss": 0.5522, "step": 9656 }, { "epoch": 1.4, "grad_norm": 6.730411052703857, "learning_rate": 1.1724529861108746e-06, "loss": 0.6022, "step": 9657 }, { "epoch": 1.4, "grad_norm": 6.1865057945251465, "learning_rate": 1.1722970728001788e-06, "loss": 0.4768, "step": 9658 }, { "epoch": 1.4, "grad_norm": 5.876435279846191, "learning_rate": 1.1721411551728598e-06, "loss": 0.5061, "step": 9659 }, { "epoch": 1.4, "grad_norm": 6.648624420166016, "learning_rate": 1.1719852332328236e-06, "loss": 0.4776, "step": 9660 }, { "epoch": 1.4, "grad_norm": 6.203718185424805, "learning_rate": 1.1718293069839767e-06, "loss": 0.5062, "step": 9661 }, { "epoch": 1.4, "grad_norm": 6.604988098144531, "learning_rate": 1.1716733764302257e-06, "loss": 0.5642, "step": 9662 }, { "epoch": 1.4, "grad_norm": 7.157228469848633, "learning_rate": 1.1715174415754773e-06, "loss": 0.5339, "step": 9663 }, { "epoch": 1.4, "grad_norm": 6.423772811889648, "learning_rate": 1.1713615024236383e-06, "loss": 0.6333, "step": 9664 }, { "epoch": 1.4, "grad_norm": 7.227513790130615, "learning_rate": 1.1712055589786152e-06, "loss": 0.5528, "step": 9665 }, { "epoch": 1.4, "grad_norm": 6.787863731384277, "learning_rate": 1.171049611244315e-06, "loss": 0.5095, "step": 9666 }, { "epoch": 1.4, "grad_norm": 6.134617328643799, "learning_rate": 1.1708936592246446e-06, "loss": 0.5211, "step": 9667 }, { "epoch": 1.4, "grad_norm": 6.819701671600342, "learning_rate": 1.1707377029235112e-06, "loss": 0.5277, "step": 9668 }, { "epoch": 1.4, "grad_norm": 6.225666046142578, "learning_rate": 1.170581742344822e-06, "loss": 0.5921, "step": 9669 }, { "epoch": 1.4, "grad_norm": 5.748737812042236, "learning_rate": 1.1704257774924843e-06, "loss": 0.4707, "step": 9670 }, { "epoch": 1.4, "grad_norm": 6.941135406494141, "learning_rate": 1.1702698083704062e-06, "loss": 0.643, "step": 9671 }, { "epoch": 1.4, "grad_norm": 6.179842948913574, "learning_rate": 1.1701138349824943e-06, "loss": 0.4989, "step": 9672 }, { "epoch": 1.4, "grad_norm": 7.199601173400879, "learning_rate": 1.1699578573326569e-06, "loss": 0.5303, "step": 9673 }, { "epoch": 1.4, "grad_norm": 6.268270969390869, "learning_rate": 1.1698018754248016e-06, "loss": 0.5678, "step": 9674 }, { "epoch": 1.4, "grad_norm": 6.001768589019775, "learning_rate": 1.1696458892628357e-06, "loss": 0.5167, "step": 9675 }, { "epoch": 1.4, "grad_norm": 6.7986578941345215, "learning_rate": 1.1694898988506684e-06, "loss": 0.5623, "step": 9676 }, { "epoch": 1.4, "grad_norm": 6.025575637817383, "learning_rate": 1.1693339041922065e-06, "loss": 0.5318, "step": 9677 }, { "epoch": 1.4, "grad_norm": 6.477719783782959, "learning_rate": 1.169177905291359e-06, "loss": 0.513, "step": 9678 }, { "epoch": 1.4, "grad_norm": 6.65821647644043, "learning_rate": 1.169021902152034e-06, "loss": 0.5795, "step": 9679 }, { "epoch": 1.4, "grad_norm": 6.362710952758789, "learning_rate": 1.1688658947781398e-06, "loss": 0.5346, "step": 9680 }, { "epoch": 1.4, "grad_norm": 6.267651557922363, "learning_rate": 1.168709883173585e-06, "loss": 0.5119, "step": 9681 }, { "epoch": 1.4, "grad_norm": 6.870031833648682, "learning_rate": 1.1685538673422784e-06, "loss": 0.5447, "step": 9682 }, { "epoch": 1.4, "grad_norm": 6.2351884841918945, "learning_rate": 1.1683978472881282e-06, "loss": 0.553, "step": 9683 }, { "epoch": 1.41, "grad_norm": 6.648645401000977, "learning_rate": 1.1682418230150436e-06, "loss": 0.6108, "step": 9684 }, { "epoch": 1.41, "grad_norm": 5.381399154663086, "learning_rate": 1.1680857945269335e-06, "loss": 0.4819, "step": 9685 }, { "epoch": 1.41, "grad_norm": 6.967221260070801, "learning_rate": 1.1679297618277074e-06, "loss": 0.5964, "step": 9686 }, { "epoch": 1.41, "grad_norm": 5.819156646728516, "learning_rate": 1.1677737249212735e-06, "loss": 0.5307, "step": 9687 }, { "epoch": 1.41, "grad_norm": 6.257544040679932, "learning_rate": 1.1676176838115416e-06, "loss": 0.5655, "step": 9688 }, { "epoch": 1.41, "grad_norm": 6.736258029937744, "learning_rate": 1.167461638502421e-06, "loss": 0.5329, "step": 9689 }, { "epoch": 1.41, "grad_norm": 6.007020950317383, "learning_rate": 1.1673055889978209e-06, "loss": 0.5125, "step": 9690 }, { "epoch": 1.41, "grad_norm": 7.2420806884765625, "learning_rate": 1.1671495353016513e-06, "loss": 0.5546, "step": 9691 }, { "epoch": 1.41, "grad_norm": 6.9176344871521, "learning_rate": 1.1669934774178218e-06, "loss": 0.5883, "step": 9692 }, { "epoch": 1.41, "grad_norm": 7.414059638977051, "learning_rate": 1.1668374153502423e-06, "loss": 0.6009, "step": 9693 }, { "epoch": 1.41, "grad_norm": 6.5330095291137695, "learning_rate": 1.166681349102822e-06, "loss": 0.525, "step": 9694 }, { "epoch": 1.41, "grad_norm": 5.959037780761719, "learning_rate": 1.1665252786794712e-06, "loss": 0.5066, "step": 9695 }, { "epoch": 1.41, "grad_norm": 6.659373760223389, "learning_rate": 1.1663692040841005e-06, "loss": 0.5433, "step": 9696 }, { "epoch": 1.41, "grad_norm": 5.928310394287109, "learning_rate": 1.1662131253206196e-06, "loss": 0.4906, "step": 9697 }, { "epoch": 1.41, "grad_norm": 6.050294399261475, "learning_rate": 1.1660570423929388e-06, "loss": 0.4919, "step": 9698 }, { "epoch": 1.41, "grad_norm": 6.3876214027404785, "learning_rate": 1.1659009553049688e-06, "loss": 0.5688, "step": 9699 }, { "epoch": 1.41, "grad_norm": 6.322335720062256, "learning_rate": 1.1657448640606198e-06, "loss": 0.5662, "step": 9700 }, { "epoch": 1.41, "grad_norm": 6.387384414672852, "learning_rate": 1.1655887686638026e-06, "loss": 0.531, "step": 9701 }, { "epoch": 1.41, "grad_norm": 6.308560848236084, "learning_rate": 1.1654326691184278e-06, "loss": 0.5148, "step": 9702 }, { "epoch": 1.41, "grad_norm": 6.458419322967529, "learning_rate": 1.1652765654284063e-06, "loss": 0.5353, "step": 9703 }, { "epoch": 1.41, "grad_norm": 6.307507514953613, "learning_rate": 1.165120457597649e-06, "loss": 0.5159, "step": 9704 }, { "epoch": 1.41, "grad_norm": 6.361331462860107, "learning_rate": 1.164964345630067e-06, "loss": 0.5169, "step": 9705 }, { "epoch": 1.41, "grad_norm": 6.670302867889404, "learning_rate": 1.164808229529571e-06, "loss": 0.5304, "step": 9706 }, { "epoch": 1.41, "grad_norm": 6.824772357940674, "learning_rate": 1.1646521093000731e-06, "loss": 0.592, "step": 9707 }, { "epoch": 1.41, "grad_norm": 6.586002826690674, "learning_rate": 1.164495984945484e-06, "loss": 0.5894, "step": 9708 }, { "epoch": 1.41, "grad_norm": 6.018703460693359, "learning_rate": 1.164339856469715e-06, "loss": 0.4472, "step": 9709 }, { "epoch": 1.41, "grad_norm": 6.194202423095703, "learning_rate": 1.1641837238766782e-06, "loss": 0.5149, "step": 9710 }, { "epoch": 1.41, "grad_norm": 6.6710357666015625, "learning_rate": 1.164027587170285e-06, "loss": 0.6731, "step": 9711 }, { "epoch": 1.41, "grad_norm": 6.892969608306885, "learning_rate": 1.163871446354447e-06, "loss": 0.52, "step": 9712 }, { "epoch": 1.41, "grad_norm": 5.7037672996521, "learning_rate": 1.163715301433076e-06, "loss": 0.5324, "step": 9713 }, { "epoch": 1.41, "grad_norm": 6.366218566894531, "learning_rate": 1.1635591524100845e-06, "loss": 0.5115, "step": 9714 }, { "epoch": 1.41, "grad_norm": 6.76076602935791, "learning_rate": 1.163402999289384e-06, "loss": 0.5444, "step": 9715 }, { "epoch": 1.41, "grad_norm": 6.843538761138916, "learning_rate": 1.1632468420748868e-06, "loss": 0.5269, "step": 9716 }, { "epoch": 1.41, "grad_norm": 7.0173659324646, "learning_rate": 1.1630906807705054e-06, "loss": 0.5288, "step": 9717 }, { "epoch": 1.41, "grad_norm": 6.698212146759033, "learning_rate": 1.1629345153801518e-06, "loss": 0.5007, "step": 9718 }, { "epoch": 1.41, "grad_norm": 6.712105751037598, "learning_rate": 1.1627783459077388e-06, "loss": 0.589, "step": 9719 }, { "epoch": 1.41, "grad_norm": 6.627536296844482, "learning_rate": 1.1626221723571786e-06, "loss": 0.4888, "step": 9720 }, { "epoch": 1.41, "grad_norm": 7.045216083526611, "learning_rate": 1.1624659947323843e-06, "loss": 0.5562, "step": 9721 }, { "epoch": 1.41, "grad_norm": 6.6755547523498535, "learning_rate": 1.1623098130372684e-06, "loss": 0.5423, "step": 9722 }, { "epoch": 1.41, "grad_norm": 6.505723476409912, "learning_rate": 1.162153627275744e-06, "loss": 0.5577, "step": 9723 }, { "epoch": 1.41, "grad_norm": 6.823606491088867, "learning_rate": 1.1619974374517237e-06, "loss": 0.5742, "step": 9724 }, { "epoch": 1.41, "grad_norm": 6.247791290283203, "learning_rate": 1.1618412435691208e-06, "loss": 0.5304, "step": 9725 }, { "epoch": 1.41, "grad_norm": 6.2656378746032715, "learning_rate": 1.1616850456318488e-06, "loss": 0.5072, "step": 9726 }, { "epoch": 1.41, "grad_norm": 6.905458927154541, "learning_rate": 1.1615288436438206e-06, "loss": 0.4822, "step": 9727 }, { "epoch": 1.41, "grad_norm": 6.430187225341797, "learning_rate": 1.1613726376089497e-06, "loss": 0.5442, "step": 9728 }, { "epoch": 1.41, "grad_norm": 6.438072681427002, "learning_rate": 1.1612164275311496e-06, "loss": 0.5474, "step": 9729 }, { "epoch": 1.41, "grad_norm": 6.028015613555908, "learning_rate": 1.1610602134143338e-06, "loss": 0.5052, "step": 9730 }, { "epoch": 1.41, "grad_norm": 6.017862796783447, "learning_rate": 1.1609039952624158e-06, "loss": 0.5541, "step": 9731 }, { "epoch": 1.41, "grad_norm": 6.079652309417725, "learning_rate": 1.1607477730793097e-06, "loss": 0.4761, "step": 9732 }, { "epoch": 1.41, "grad_norm": 7.842377662658691, "learning_rate": 1.1605915468689296e-06, "loss": 0.6308, "step": 9733 }, { "epoch": 1.41, "grad_norm": 5.906550407409668, "learning_rate": 1.1604353166351895e-06, "loss": 0.4728, "step": 9734 }, { "epoch": 1.41, "grad_norm": 6.741135597229004, "learning_rate": 1.1602790823820028e-06, "loss": 0.5243, "step": 9735 }, { "epoch": 1.41, "grad_norm": 5.9975433349609375, "learning_rate": 1.160122844113284e-06, "loss": 0.5157, "step": 9736 }, { "epoch": 1.41, "grad_norm": 7.835977554321289, "learning_rate": 1.1599666018329476e-06, "loss": 0.5857, "step": 9737 }, { "epoch": 1.41, "grad_norm": 6.581551551818848, "learning_rate": 1.159810355544908e-06, "loss": 0.5511, "step": 9738 }, { "epoch": 1.41, "grad_norm": 6.309464454650879, "learning_rate": 1.1596541052530794e-06, "loss": 0.5484, "step": 9739 }, { "epoch": 1.41, "grad_norm": 6.506902694702148, "learning_rate": 1.1594978509613768e-06, "loss": 0.5388, "step": 9740 }, { "epoch": 1.41, "grad_norm": 6.611265659332275, "learning_rate": 1.159341592673715e-06, "loss": 0.5784, "step": 9741 }, { "epoch": 1.41, "grad_norm": 6.190866470336914, "learning_rate": 1.1591853303940082e-06, "loss": 0.506, "step": 9742 }, { "epoch": 1.41, "grad_norm": 6.735915184020996, "learning_rate": 1.1590290641261715e-06, "loss": 0.5943, "step": 9743 }, { "epoch": 1.41, "grad_norm": 6.405464172363281, "learning_rate": 1.1588727938741201e-06, "loss": 0.5637, "step": 9744 }, { "epoch": 1.41, "grad_norm": 6.2783918380737305, "learning_rate": 1.158716519641769e-06, "loss": 0.5685, "step": 9745 }, { "epoch": 1.41, "grad_norm": 6.606926441192627, "learning_rate": 1.1585602414330334e-06, "loss": 0.5128, "step": 9746 }, { "epoch": 1.41, "grad_norm": 6.483755111694336, "learning_rate": 1.1584039592518283e-06, "loss": 0.5378, "step": 9747 }, { "epoch": 1.41, "grad_norm": 6.760802268981934, "learning_rate": 1.1582476731020696e-06, "loss": 0.5185, "step": 9748 }, { "epoch": 1.41, "grad_norm": 6.197296142578125, "learning_rate": 1.1580913829876727e-06, "loss": 0.5422, "step": 9749 }, { "epoch": 1.41, "grad_norm": 6.74798059463501, "learning_rate": 1.157935088912553e-06, "loss": 0.5799, "step": 9750 }, { "epoch": 1.41, "grad_norm": 6.702239990234375, "learning_rate": 1.157778790880626e-06, "loss": 0.5017, "step": 9751 }, { "epoch": 1.41, "grad_norm": 6.407398700714111, "learning_rate": 1.157622488895808e-06, "loss": 0.6109, "step": 9752 }, { "epoch": 1.42, "grad_norm": 5.778614521026611, "learning_rate": 1.1574661829620148e-06, "loss": 0.4909, "step": 9753 }, { "epoch": 1.42, "grad_norm": 6.422623157501221, "learning_rate": 1.157309873083162e-06, "loss": 0.5281, "step": 9754 }, { "epoch": 1.42, "grad_norm": 7.070698261260986, "learning_rate": 1.157153559263166e-06, "loss": 0.5473, "step": 9755 }, { "epoch": 1.42, "grad_norm": 6.151638507843018, "learning_rate": 1.156997241505943e-06, "loss": 0.4819, "step": 9756 }, { "epoch": 1.42, "grad_norm": 7.276637554168701, "learning_rate": 1.1568409198154092e-06, "loss": 0.604, "step": 9757 }, { "epoch": 1.42, "grad_norm": 5.881521224975586, "learning_rate": 1.156684594195481e-06, "loss": 0.47, "step": 9758 }, { "epoch": 1.42, "grad_norm": 6.2602219581604, "learning_rate": 1.1565282646500746e-06, "loss": 0.5327, "step": 9759 }, { "epoch": 1.42, "grad_norm": 6.551963806152344, "learning_rate": 1.1563719311831071e-06, "loss": 0.5762, "step": 9760 }, { "epoch": 1.42, "grad_norm": 6.708095550537109, "learning_rate": 1.156215593798495e-06, "loss": 0.5978, "step": 9761 }, { "epoch": 1.42, "grad_norm": 6.714251518249512, "learning_rate": 1.1560592525001552e-06, "loss": 0.5344, "step": 9762 }, { "epoch": 1.42, "grad_norm": 6.687692642211914, "learning_rate": 1.1559029072920039e-06, "loss": 0.6092, "step": 9763 }, { "epoch": 1.42, "grad_norm": 6.681976318359375, "learning_rate": 1.1557465581779588e-06, "loss": 0.5219, "step": 9764 }, { "epoch": 1.42, "grad_norm": 7.298851013183594, "learning_rate": 1.1555902051619368e-06, "loss": 0.5688, "step": 9765 }, { "epoch": 1.42, "grad_norm": 7.34155797958374, "learning_rate": 1.155433848247855e-06, "loss": 0.5063, "step": 9766 }, { "epoch": 1.42, "grad_norm": 7.128087997436523, "learning_rate": 1.1552774874396306e-06, "loss": 0.6183, "step": 9767 }, { "epoch": 1.42, "grad_norm": 6.502185821533203, "learning_rate": 1.155121122741181e-06, "loss": 0.5642, "step": 9768 }, { "epoch": 1.42, "grad_norm": 6.514650344848633, "learning_rate": 1.154964754156424e-06, "loss": 0.5688, "step": 9769 }, { "epoch": 1.42, "grad_norm": 6.351072788238525, "learning_rate": 1.1548083816892766e-06, "loss": 0.5051, "step": 9770 }, { "epoch": 1.42, "grad_norm": 6.5463361740112305, "learning_rate": 1.154652005343657e-06, "loss": 0.5806, "step": 9771 }, { "epoch": 1.42, "grad_norm": 6.474451065063477, "learning_rate": 1.1544956251234826e-06, "loss": 0.4394, "step": 9772 }, { "epoch": 1.42, "grad_norm": 6.941497325897217, "learning_rate": 1.154339241032671e-06, "loss": 0.5339, "step": 9773 }, { "epoch": 1.42, "grad_norm": 6.650030612945557, "learning_rate": 1.1541828530751406e-06, "loss": 0.5476, "step": 9774 }, { "epoch": 1.42, "grad_norm": 6.270461082458496, "learning_rate": 1.1540264612548096e-06, "loss": 0.5106, "step": 9775 }, { "epoch": 1.42, "grad_norm": 7.0402021408081055, "learning_rate": 1.1538700655755958e-06, "loss": 0.5499, "step": 9776 }, { "epoch": 1.42, "grad_norm": 6.5692572593688965, "learning_rate": 1.1537136660414175e-06, "loss": 0.5321, "step": 9777 }, { "epoch": 1.42, "grad_norm": 6.635631561279297, "learning_rate": 1.1535572626561929e-06, "loss": 0.5586, "step": 9778 }, { "epoch": 1.42, "grad_norm": 6.370050430297852, "learning_rate": 1.1534008554238408e-06, "loss": 0.5484, "step": 9779 }, { "epoch": 1.42, "grad_norm": 6.721899032592773, "learning_rate": 1.1532444443482793e-06, "loss": 0.5374, "step": 9780 }, { "epoch": 1.42, "grad_norm": 6.981201171875, "learning_rate": 1.1530880294334275e-06, "loss": 0.5791, "step": 9781 }, { "epoch": 1.42, "grad_norm": 6.0932393074035645, "learning_rate": 1.1529316106832038e-06, "loss": 0.5768, "step": 9782 }, { "epoch": 1.42, "grad_norm": 5.999748229980469, "learning_rate": 1.1527751881015273e-06, "loss": 0.5623, "step": 9783 }, { "epoch": 1.42, "grad_norm": 6.026662826538086, "learning_rate": 1.1526187616923162e-06, "loss": 0.545, "step": 9784 }, { "epoch": 1.42, "grad_norm": 6.272274017333984, "learning_rate": 1.1524623314594904e-06, "loss": 0.539, "step": 9785 }, { "epoch": 1.42, "grad_norm": 6.294705867767334, "learning_rate": 1.1523058974069683e-06, "loss": 0.5515, "step": 9786 }, { "epoch": 1.42, "grad_norm": 6.93252420425415, "learning_rate": 1.1521494595386696e-06, "loss": 0.5655, "step": 9787 }, { "epoch": 1.42, "grad_norm": 6.310184478759766, "learning_rate": 1.1519930178585133e-06, "loss": 0.576, "step": 9788 }, { "epoch": 1.42, "grad_norm": 6.2368879318237305, "learning_rate": 1.151836572370419e-06, "loss": 0.5888, "step": 9789 }, { "epoch": 1.42, "grad_norm": 6.177506446838379, "learning_rate": 1.1516801230783062e-06, "loss": 0.5342, "step": 9790 }, { "epoch": 1.42, "grad_norm": 6.37153959274292, "learning_rate": 1.1515236699860943e-06, "loss": 0.5541, "step": 9791 }, { "epoch": 1.42, "grad_norm": 7.338692665100098, "learning_rate": 1.151367213097703e-06, "loss": 0.5428, "step": 9792 }, { "epoch": 1.42, "grad_norm": 6.302387714385986, "learning_rate": 1.151210752417052e-06, "loss": 0.5434, "step": 9793 }, { "epoch": 1.42, "grad_norm": 6.097578525543213, "learning_rate": 1.1510542879480613e-06, "loss": 0.556, "step": 9794 }, { "epoch": 1.42, "grad_norm": 6.500289440155029, "learning_rate": 1.150897819694651e-06, "loss": 0.5235, "step": 9795 }, { "epoch": 1.42, "grad_norm": 5.413481712341309, "learning_rate": 1.1507413476607408e-06, "loss": 0.4556, "step": 9796 }, { "epoch": 1.42, "grad_norm": 5.765619277954102, "learning_rate": 1.1505848718502514e-06, "loss": 0.5313, "step": 9797 }, { "epoch": 1.42, "grad_norm": 6.53684139251709, "learning_rate": 1.1504283922671026e-06, "loss": 0.5143, "step": 9798 }, { "epoch": 1.42, "grad_norm": 6.591603755950928, "learning_rate": 1.1502719089152146e-06, "loss": 0.5908, "step": 9799 }, { "epoch": 1.42, "grad_norm": 7.8351054191589355, "learning_rate": 1.150115421798508e-06, "loss": 0.5456, "step": 9800 }, { "epoch": 1.42, "grad_norm": 6.24168586730957, "learning_rate": 1.1499589309209038e-06, "loss": 0.5355, "step": 9801 }, { "epoch": 1.42, "grad_norm": 8.04647159576416, "learning_rate": 1.149802436286322e-06, "loss": 0.6725, "step": 9802 }, { "epoch": 1.42, "grad_norm": 6.446549892425537, "learning_rate": 1.149645937898684e-06, "loss": 0.5497, "step": 9803 }, { "epoch": 1.42, "grad_norm": 6.807458400726318, "learning_rate": 1.1494894357619098e-06, "loss": 0.5277, "step": 9804 }, { "epoch": 1.42, "grad_norm": 5.948081016540527, "learning_rate": 1.1493329298799208e-06, "loss": 0.4919, "step": 9805 }, { "epoch": 1.42, "grad_norm": 7.328028678894043, "learning_rate": 1.1491764202566378e-06, "loss": 0.5604, "step": 9806 }, { "epoch": 1.42, "grad_norm": 6.503210067749023, "learning_rate": 1.149019906895982e-06, "loss": 0.5452, "step": 9807 }, { "epoch": 1.42, "grad_norm": 6.637442111968994, "learning_rate": 1.1488633898018746e-06, "loss": 0.587, "step": 9808 }, { "epoch": 1.42, "grad_norm": 6.1567606925964355, "learning_rate": 1.1487068689782367e-06, "loss": 0.4708, "step": 9809 }, { "epoch": 1.42, "grad_norm": 7.072231769561768, "learning_rate": 1.14855034442899e-06, "loss": 0.513, "step": 9810 }, { "epoch": 1.42, "grad_norm": 7.077748775482178, "learning_rate": 1.148393816158056e-06, "loss": 0.5442, "step": 9811 }, { "epoch": 1.42, "grad_norm": 6.571269512176514, "learning_rate": 1.1482372841693558e-06, "loss": 0.4784, "step": 9812 }, { "epoch": 1.42, "grad_norm": 6.865604877471924, "learning_rate": 1.1480807484668112e-06, "loss": 0.5806, "step": 9813 }, { "epoch": 1.42, "grad_norm": 6.11481237411499, "learning_rate": 1.147924209054344e-06, "loss": 0.579, "step": 9814 }, { "epoch": 1.42, "grad_norm": 6.069549560546875, "learning_rate": 1.147767665935876e-06, "loss": 0.5675, "step": 9815 }, { "epoch": 1.42, "grad_norm": 6.0432233810424805, "learning_rate": 1.1476111191153297e-06, "loss": 0.4759, "step": 9816 }, { "epoch": 1.42, "grad_norm": 6.688717842102051, "learning_rate": 1.1474545685966265e-06, "loss": 0.571, "step": 9817 }, { "epoch": 1.42, "grad_norm": 6.105648994445801, "learning_rate": 1.1472980143836886e-06, "loss": 0.4623, "step": 9818 }, { "epoch": 1.42, "grad_norm": 6.573068141937256, "learning_rate": 1.1471414564804382e-06, "loss": 0.519, "step": 9819 }, { "epoch": 1.42, "grad_norm": 5.943517684936523, "learning_rate": 1.146984894890798e-06, "loss": 0.48, "step": 9820 }, { "epoch": 1.42, "grad_norm": 6.050318241119385, "learning_rate": 1.1468283296186895e-06, "loss": 0.5664, "step": 9821 }, { "epoch": 1.43, "grad_norm": 6.250467300415039, "learning_rate": 1.1466717606680363e-06, "loss": 0.5891, "step": 9822 }, { "epoch": 1.43, "grad_norm": 6.470656394958496, "learning_rate": 1.1465151880427601e-06, "loss": 0.5357, "step": 9823 }, { "epoch": 1.43, "grad_norm": 7.14705753326416, "learning_rate": 1.146358611746784e-06, "loss": 0.5181, "step": 9824 }, { "epoch": 1.43, "grad_norm": 5.982120037078857, "learning_rate": 1.1462020317840308e-06, "loss": 0.5789, "step": 9825 }, { "epoch": 1.43, "grad_norm": 6.9141764640808105, "learning_rate": 1.1460454481584234e-06, "loss": 0.5275, "step": 9826 }, { "epoch": 1.43, "grad_norm": 6.545143127441406, "learning_rate": 1.1458888608738845e-06, "loss": 0.5861, "step": 9827 }, { "epoch": 1.43, "grad_norm": 6.096268653869629, "learning_rate": 1.145732269934337e-06, "loss": 0.5608, "step": 9828 }, { "epoch": 1.43, "grad_norm": 6.3440446853637695, "learning_rate": 1.1455756753437044e-06, "loss": 0.489, "step": 9829 }, { "epoch": 1.43, "grad_norm": 6.069441795349121, "learning_rate": 1.1454190771059098e-06, "loss": 0.5324, "step": 9830 }, { "epoch": 1.43, "grad_norm": 6.380812168121338, "learning_rate": 1.1452624752248767e-06, "loss": 0.5378, "step": 9831 }, { "epoch": 1.43, "grad_norm": 6.239450931549072, "learning_rate": 1.1451058697045282e-06, "loss": 0.5643, "step": 9832 }, { "epoch": 1.43, "grad_norm": 7.248030662536621, "learning_rate": 1.1449492605487878e-06, "loss": 0.4796, "step": 9833 }, { "epoch": 1.43, "grad_norm": 6.800642013549805, "learning_rate": 1.1447926477615791e-06, "loss": 0.5331, "step": 9834 }, { "epoch": 1.43, "grad_norm": 7.0517497062683105, "learning_rate": 1.1446360313468259e-06, "loss": 0.5472, "step": 9835 }, { "epoch": 1.43, "grad_norm": 6.341695308685303, "learning_rate": 1.144479411308452e-06, "loss": 0.4776, "step": 9836 }, { "epoch": 1.43, "grad_norm": 6.695750713348389, "learning_rate": 1.1443227876503813e-06, "loss": 0.5532, "step": 9837 }, { "epoch": 1.43, "grad_norm": 6.454346656799316, "learning_rate": 1.1441661603765375e-06, "loss": 0.4794, "step": 9838 }, { "epoch": 1.43, "grad_norm": 6.411605358123779, "learning_rate": 1.1440095294908451e-06, "loss": 0.5381, "step": 9839 }, { "epoch": 1.43, "grad_norm": 6.898112773895264, "learning_rate": 1.143852894997228e-06, "loss": 0.6223, "step": 9840 }, { "epoch": 1.43, "grad_norm": 6.383557319641113, "learning_rate": 1.14369625689961e-06, "loss": 0.4683, "step": 9841 }, { "epoch": 1.43, "grad_norm": 6.310441493988037, "learning_rate": 1.1435396152019157e-06, "loss": 0.5496, "step": 9842 }, { "epoch": 1.43, "grad_norm": 6.028039932250977, "learning_rate": 1.1433829699080698e-06, "loss": 0.4997, "step": 9843 }, { "epoch": 1.43, "grad_norm": 6.598404884338379, "learning_rate": 1.1432263210219964e-06, "loss": 0.5469, "step": 9844 }, { "epoch": 1.43, "grad_norm": 6.135117530822754, "learning_rate": 1.1430696685476201e-06, "loss": 0.5182, "step": 9845 }, { "epoch": 1.43, "grad_norm": 6.814521312713623, "learning_rate": 1.1429130124888664e-06, "loss": 0.5543, "step": 9846 }, { "epoch": 1.43, "grad_norm": 6.950992584228516, "learning_rate": 1.142756352849659e-06, "loss": 0.5295, "step": 9847 }, { "epoch": 1.43, "grad_norm": 6.5750932693481445, "learning_rate": 1.142599689633923e-06, "loss": 0.6164, "step": 9848 }, { "epoch": 1.43, "grad_norm": 6.288128852844238, "learning_rate": 1.1424430228455835e-06, "loss": 0.5345, "step": 9849 }, { "epoch": 1.43, "grad_norm": 6.3027777671813965, "learning_rate": 1.1422863524885653e-06, "loss": 0.5356, "step": 9850 }, { "epoch": 1.43, "grad_norm": 6.046282768249512, "learning_rate": 1.142129678566794e-06, "loss": 0.5466, "step": 9851 }, { "epoch": 1.43, "grad_norm": 7.096277713775635, "learning_rate": 1.1419730010841947e-06, "loss": 0.4344, "step": 9852 }, { "epoch": 1.43, "grad_norm": 6.474952220916748, "learning_rate": 1.1418163200446921e-06, "loss": 0.5259, "step": 9853 }, { "epoch": 1.43, "grad_norm": 6.651590347290039, "learning_rate": 1.1416596354522123e-06, "loss": 0.5617, "step": 9854 }, { "epoch": 1.43, "grad_norm": 6.523551940917969, "learning_rate": 1.1415029473106805e-06, "loss": 0.5094, "step": 9855 }, { "epoch": 1.43, "grad_norm": 6.358683109283447, "learning_rate": 1.1413462556240223e-06, "loss": 0.5495, "step": 9856 }, { "epoch": 1.43, "grad_norm": 7.01971435546875, "learning_rate": 1.1411895603961632e-06, "loss": 0.563, "step": 9857 }, { "epoch": 1.43, "grad_norm": 7.592926979064941, "learning_rate": 1.1410328616310292e-06, "loss": 0.5538, "step": 9858 }, { "epoch": 1.43, "grad_norm": 5.948081970214844, "learning_rate": 1.1408761593325462e-06, "loss": 0.4906, "step": 9859 }, { "epoch": 1.43, "grad_norm": 6.599699020385742, "learning_rate": 1.1407194535046397e-06, "loss": 0.4875, "step": 9860 }, { "epoch": 1.43, "grad_norm": 7.119382858276367, "learning_rate": 1.1405627441512357e-06, "loss": 0.5721, "step": 9861 }, { "epoch": 1.43, "grad_norm": 5.4148383140563965, "learning_rate": 1.140406031276261e-06, "loss": 0.5, "step": 9862 }, { "epoch": 1.43, "grad_norm": 7.082543849945068, "learning_rate": 1.140249314883641e-06, "loss": 0.6099, "step": 9863 }, { "epoch": 1.43, "grad_norm": 7.517477989196777, "learning_rate": 1.1400925949773023e-06, "loss": 0.5527, "step": 9864 }, { "epoch": 1.43, "grad_norm": 6.416750431060791, "learning_rate": 1.1399358715611713e-06, "loss": 0.5171, "step": 9865 }, { "epoch": 1.43, "grad_norm": 6.5384697914123535, "learning_rate": 1.1397791446391745e-06, "loss": 0.5885, "step": 9866 }, { "epoch": 1.43, "grad_norm": 6.14700984954834, "learning_rate": 1.1396224142152383e-06, "loss": 0.4977, "step": 9867 }, { "epoch": 1.43, "grad_norm": 6.424787521362305, "learning_rate": 1.1394656802932897e-06, "loss": 0.4585, "step": 9868 }, { "epoch": 1.43, "grad_norm": 5.858150959014893, "learning_rate": 1.1393089428772547e-06, "loss": 0.5134, "step": 9869 }, { "epoch": 1.43, "grad_norm": 7.290634632110596, "learning_rate": 1.1391522019710604e-06, "loss": 0.5674, "step": 9870 }, { "epoch": 1.43, "grad_norm": 5.813167572021484, "learning_rate": 1.138995457578634e-06, "loss": 0.5161, "step": 9871 }, { "epoch": 1.43, "grad_norm": 6.538531303405762, "learning_rate": 1.138838709703902e-06, "loss": 0.5896, "step": 9872 }, { "epoch": 1.43, "grad_norm": 6.336010456085205, "learning_rate": 1.1386819583507921e-06, "loss": 0.5685, "step": 9873 }, { "epoch": 1.43, "grad_norm": 7.026656150817871, "learning_rate": 1.1385252035232311e-06, "loss": 0.5555, "step": 9874 }, { "epoch": 1.43, "grad_norm": 6.142334461212158, "learning_rate": 1.1383684452251458e-06, "loss": 0.5281, "step": 9875 }, { "epoch": 1.43, "grad_norm": 6.065049171447754, "learning_rate": 1.1382116834604644e-06, "loss": 0.5127, "step": 9876 }, { "epoch": 1.43, "grad_norm": 6.603073596954346, "learning_rate": 1.1380549182331133e-06, "loss": 0.5828, "step": 9877 }, { "epoch": 1.43, "grad_norm": 7.954566955566406, "learning_rate": 1.1378981495470205e-06, "loss": 0.5763, "step": 9878 }, { "epoch": 1.43, "grad_norm": 6.3083391189575195, "learning_rate": 1.137741377406114e-06, "loss": 0.5106, "step": 9879 }, { "epoch": 1.43, "grad_norm": 6.3658223152160645, "learning_rate": 1.1375846018143212e-06, "loss": 0.5326, "step": 9880 }, { "epoch": 1.43, "grad_norm": 5.9079976081848145, "learning_rate": 1.1374278227755697e-06, "loss": 0.4805, "step": 9881 }, { "epoch": 1.43, "grad_norm": 7.397467136383057, "learning_rate": 1.1372710402937871e-06, "loss": 0.6379, "step": 9882 }, { "epoch": 1.43, "grad_norm": 5.538234233856201, "learning_rate": 1.1371142543729017e-06, "loss": 0.5137, "step": 9883 }, { "epoch": 1.43, "grad_norm": 6.926553249359131, "learning_rate": 1.1369574650168416e-06, "loss": 0.5156, "step": 9884 }, { "epoch": 1.43, "grad_norm": 6.751585006713867, "learning_rate": 1.1368006722295347e-06, "loss": 0.6305, "step": 9885 }, { "epoch": 1.43, "grad_norm": 5.962057113647461, "learning_rate": 1.136643876014909e-06, "loss": 0.5148, "step": 9886 }, { "epoch": 1.43, "grad_norm": 7.019792079925537, "learning_rate": 1.1364870763768934e-06, "loss": 0.7305, "step": 9887 }, { "epoch": 1.43, "grad_norm": 6.248307228088379, "learning_rate": 1.1363302733194157e-06, "loss": 0.5409, "step": 9888 }, { "epoch": 1.43, "grad_norm": 7.051721096038818, "learning_rate": 1.1361734668464046e-06, "loss": 0.5339, "step": 9889 }, { "epoch": 1.43, "grad_norm": 6.494656085968018, "learning_rate": 1.1360166569617884e-06, "loss": 0.5754, "step": 9890 }, { "epoch": 1.44, "grad_norm": 6.533790588378906, "learning_rate": 1.135859843669496e-06, "loss": 0.5231, "step": 9891 }, { "epoch": 1.44, "grad_norm": 5.820913791656494, "learning_rate": 1.1357030269734557e-06, "loss": 0.5364, "step": 9892 }, { "epoch": 1.44, "grad_norm": 7.519674777984619, "learning_rate": 1.1355462068775967e-06, "loss": 0.6484, "step": 9893 }, { "epoch": 1.44, "grad_norm": 6.8233184814453125, "learning_rate": 1.1353893833858482e-06, "loss": 0.5873, "step": 9894 }, { "epoch": 1.44, "grad_norm": 6.557351112365723, "learning_rate": 1.1352325565021382e-06, "loss": 0.5542, "step": 9895 }, { "epoch": 1.44, "grad_norm": 6.393574237823486, "learning_rate": 1.1350757262303961e-06, "loss": 0.5168, "step": 9896 }, { "epoch": 1.44, "grad_norm": 6.599887847900391, "learning_rate": 1.1349188925745514e-06, "loss": 0.5193, "step": 9897 }, { "epoch": 1.44, "grad_norm": 6.3712286949157715, "learning_rate": 1.134762055538533e-06, "loss": 0.5482, "step": 9898 }, { "epoch": 1.44, "grad_norm": 6.641261577606201, "learning_rate": 1.1346052151262702e-06, "loss": 0.519, "step": 9899 }, { "epoch": 1.44, "grad_norm": 5.634506702423096, "learning_rate": 1.1344483713416924e-06, "loss": 0.4902, "step": 9900 }, { "epoch": 1.44, "grad_norm": 5.570167541503906, "learning_rate": 1.1342915241887292e-06, "loss": 0.504, "step": 9901 }, { "epoch": 1.44, "grad_norm": 6.848146915435791, "learning_rate": 1.1341346736713102e-06, "loss": 0.5348, "step": 9902 }, { "epoch": 1.44, "grad_norm": 7.578258991241455, "learning_rate": 1.133977819793365e-06, "loss": 0.6908, "step": 9903 }, { "epoch": 1.44, "grad_norm": 5.719189643859863, "learning_rate": 1.1338209625588226e-06, "loss": 0.5107, "step": 9904 }, { "epoch": 1.44, "grad_norm": 5.722665309906006, "learning_rate": 1.1336641019716139e-06, "loss": 0.4906, "step": 9905 }, { "epoch": 1.44, "grad_norm": 6.311616897583008, "learning_rate": 1.133507238035668e-06, "loss": 0.5241, "step": 9906 }, { "epoch": 1.44, "grad_norm": 6.167381286621094, "learning_rate": 1.1333503707549156e-06, "loss": 0.6126, "step": 9907 }, { "epoch": 1.44, "grad_norm": 6.271237850189209, "learning_rate": 1.1331935001332863e-06, "loss": 0.5035, "step": 9908 }, { "epoch": 1.44, "grad_norm": 6.218276500701904, "learning_rate": 1.13303662617471e-06, "loss": 0.5608, "step": 9909 }, { "epoch": 1.44, "grad_norm": 6.279354572296143, "learning_rate": 1.1328797488831172e-06, "loss": 0.5328, "step": 9910 }, { "epoch": 1.44, "grad_norm": 6.358423709869385, "learning_rate": 1.1327228682624383e-06, "loss": 0.4681, "step": 9911 }, { "epoch": 1.44, "grad_norm": 7.461533546447754, "learning_rate": 1.1325659843166036e-06, "loss": 0.6024, "step": 9912 }, { "epoch": 1.44, "grad_norm": 7.245505332946777, "learning_rate": 1.1324090970495434e-06, "loss": 0.5887, "step": 9913 }, { "epoch": 1.44, "grad_norm": 6.031066417694092, "learning_rate": 1.1322522064651888e-06, "loss": 0.554, "step": 9914 }, { "epoch": 1.44, "grad_norm": 6.3255462646484375, "learning_rate": 1.13209531256747e-06, "loss": 0.5036, "step": 9915 }, { "epoch": 1.44, "grad_norm": 6.381364822387695, "learning_rate": 1.1319384153603175e-06, "loss": 0.5282, "step": 9916 }, { "epoch": 1.44, "grad_norm": 6.657992839813232, "learning_rate": 1.1317815148476625e-06, "loss": 0.5178, "step": 9917 }, { "epoch": 1.44, "grad_norm": 7.806675434112549, "learning_rate": 1.1316246110334355e-06, "loss": 0.5888, "step": 9918 }, { "epoch": 1.44, "grad_norm": 6.228683948516846, "learning_rate": 1.1314677039215682e-06, "loss": 0.5414, "step": 9919 }, { "epoch": 1.44, "grad_norm": 7.251908302307129, "learning_rate": 1.1313107935159906e-06, "loss": 0.5182, "step": 9920 }, { "epoch": 1.44, "grad_norm": 6.0481462478637695, "learning_rate": 1.131153879820635e-06, "loss": 0.5356, "step": 9921 }, { "epoch": 1.44, "grad_norm": 6.7672929763793945, "learning_rate": 1.130996962839432e-06, "loss": 0.5524, "step": 9922 }, { "epoch": 1.44, "grad_norm": 6.405246257781982, "learning_rate": 1.1308400425763128e-06, "loss": 0.5186, "step": 9923 }, { "epoch": 1.44, "grad_norm": 6.791097164154053, "learning_rate": 1.130683119035209e-06, "loss": 0.569, "step": 9924 }, { "epoch": 1.44, "grad_norm": 8.149731636047363, "learning_rate": 1.1305261922200517e-06, "loss": 0.5477, "step": 9925 }, { "epoch": 1.44, "grad_norm": 5.631518363952637, "learning_rate": 1.130369262134773e-06, "loss": 0.4125, "step": 9926 }, { "epoch": 1.44, "grad_norm": 5.7815470695495605, "learning_rate": 1.1302123287833041e-06, "loss": 0.5135, "step": 9927 }, { "epoch": 1.44, "grad_norm": 5.757076263427734, "learning_rate": 1.1300553921695768e-06, "loss": 0.4669, "step": 9928 }, { "epoch": 1.44, "grad_norm": 6.6899824142456055, "learning_rate": 1.1298984522975232e-06, "loss": 0.5046, "step": 9929 }, { "epoch": 1.44, "grad_norm": 6.023127555847168, "learning_rate": 1.129741509171075e-06, "loss": 0.4715, "step": 9930 }, { "epoch": 1.44, "grad_norm": 7.344395160675049, "learning_rate": 1.1295845627941638e-06, "loss": 0.6182, "step": 9931 }, { "epoch": 1.44, "grad_norm": 5.970120906829834, "learning_rate": 1.129427613170722e-06, "loss": 0.4994, "step": 9932 }, { "epoch": 1.44, "grad_norm": 5.73195219039917, "learning_rate": 1.1292706603046818e-06, "loss": 0.525, "step": 9933 }, { "epoch": 1.44, "grad_norm": 6.079349517822266, "learning_rate": 1.1291137041999752e-06, "loss": 0.4783, "step": 9934 }, { "epoch": 1.44, "grad_norm": 6.219986438751221, "learning_rate": 1.1289567448605346e-06, "loss": 0.5377, "step": 9935 }, { "epoch": 1.44, "grad_norm": 6.6202592849731445, "learning_rate": 1.1287997822902923e-06, "loss": 0.5769, "step": 9936 }, { "epoch": 1.44, "grad_norm": 5.838213920593262, "learning_rate": 1.1286428164931807e-06, "loss": 0.5368, "step": 9937 }, { "epoch": 1.44, "grad_norm": 6.116337776184082, "learning_rate": 1.1284858474731322e-06, "loss": 0.5622, "step": 9938 }, { "epoch": 1.44, "grad_norm": 7.171032428741455, "learning_rate": 1.1283288752340796e-06, "loss": 0.5592, "step": 9939 }, { "epoch": 1.44, "grad_norm": 6.041110515594482, "learning_rate": 1.1281718997799556e-06, "loss": 0.5326, "step": 9940 }, { "epoch": 1.44, "grad_norm": 6.492065906524658, "learning_rate": 1.128014921114693e-06, "loss": 0.5558, "step": 9941 }, { "epoch": 1.44, "grad_norm": 5.911303520202637, "learning_rate": 1.1278579392422246e-06, "loss": 0.4349, "step": 9942 }, { "epoch": 1.44, "grad_norm": 7.169015884399414, "learning_rate": 1.1277009541664834e-06, "loss": 0.5865, "step": 9943 }, { "epoch": 1.44, "grad_norm": 6.287045955657959, "learning_rate": 1.127543965891402e-06, "loss": 0.4751, "step": 9944 }, { "epoch": 1.44, "grad_norm": 6.867257595062256, "learning_rate": 1.1273869744209137e-06, "loss": 0.5203, "step": 9945 }, { "epoch": 1.44, "grad_norm": 6.841338634490967, "learning_rate": 1.1272299797589517e-06, "loss": 0.5396, "step": 9946 }, { "epoch": 1.44, "grad_norm": 6.7295942306518555, "learning_rate": 1.1270729819094496e-06, "loss": 0.5724, "step": 9947 }, { "epoch": 1.44, "grad_norm": 6.242347717285156, "learning_rate": 1.12691598087634e-06, "loss": 0.4685, "step": 9948 }, { "epoch": 1.44, "grad_norm": 7.116365909576416, "learning_rate": 1.126758976663557e-06, "loss": 0.5872, "step": 9949 }, { "epoch": 1.44, "grad_norm": 6.894577503204346, "learning_rate": 1.126601969275034e-06, "loss": 0.5909, "step": 9950 }, { "epoch": 1.44, "grad_norm": 5.734568119049072, "learning_rate": 1.1264449587147041e-06, "loss": 0.4967, "step": 9951 }, { "epoch": 1.44, "grad_norm": 6.57424783706665, "learning_rate": 1.1262879449865012e-06, "loss": 0.5154, "step": 9952 }, { "epoch": 1.44, "grad_norm": 6.531941890716553, "learning_rate": 1.126130928094359e-06, "loss": 0.5719, "step": 9953 }, { "epoch": 1.44, "grad_norm": 7.474276065826416, "learning_rate": 1.1259739080422116e-06, "loss": 0.5925, "step": 9954 }, { "epoch": 1.44, "grad_norm": 6.502480506896973, "learning_rate": 1.1258168848339922e-06, "loss": 0.5337, "step": 9955 }, { "epoch": 1.44, "grad_norm": 6.450140476226807, "learning_rate": 1.1256598584736354e-06, "loss": 0.5757, "step": 9956 }, { "epoch": 1.44, "grad_norm": 6.861716270446777, "learning_rate": 1.125502828965075e-06, "loss": 0.5503, "step": 9957 }, { "epoch": 1.44, "grad_norm": 7.697381496429443, "learning_rate": 1.1253457963122453e-06, "loss": 0.5544, "step": 9958 }, { "epoch": 1.45, "grad_norm": 7.133146286010742, "learning_rate": 1.1251887605190801e-06, "loss": 0.5535, "step": 9959 }, { "epoch": 1.45, "grad_norm": 6.879972457885742, "learning_rate": 1.125031721589514e-06, "loss": 0.5686, "step": 9960 }, { "epoch": 1.45, "grad_norm": 6.55883264541626, "learning_rate": 1.1248746795274813e-06, "loss": 0.5373, "step": 9961 }, { "epoch": 1.45, "grad_norm": 6.8267903327941895, "learning_rate": 1.1247176343369165e-06, "loss": 0.6116, "step": 9962 }, { "epoch": 1.45, "grad_norm": 7.055877208709717, "learning_rate": 1.1245605860217539e-06, "loss": 0.5651, "step": 9963 }, { "epoch": 1.45, "grad_norm": 8.331008911132812, "learning_rate": 1.1244035345859284e-06, "loss": 0.5427, "step": 9964 }, { "epoch": 1.45, "grad_norm": 7.03126335144043, "learning_rate": 1.1242464800333742e-06, "loss": 0.5694, "step": 9965 }, { "epoch": 1.45, "grad_norm": 6.137848854064941, "learning_rate": 1.1240894223680264e-06, "loss": 0.5704, "step": 9966 }, { "epoch": 1.45, "grad_norm": 6.871687412261963, "learning_rate": 1.1239323615938196e-06, "loss": 0.5324, "step": 9967 }, { "epoch": 1.45, "grad_norm": 6.249361991882324, "learning_rate": 1.1237752977146891e-06, "loss": 0.5815, "step": 9968 }, { "epoch": 1.45, "grad_norm": 6.0037641525268555, "learning_rate": 1.1236182307345696e-06, "loss": 0.6528, "step": 9969 }, { "epoch": 1.45, "grad_norm": 6.205370903015137, "learning_rate": 1.123461160657396e-06, "loss": 0.5764, "step": 9970 }, { "epoch": 1.45, "grad_norm": 6.7156829833984375, "learning_rate": 1.123304087487104e-06, "loss": 0.548, "step": 9971 }, { "epoch": 1.45, "grad_norm": 6.623602867126465, "learning_rate": 1.1231470112276283e-06, "loss": 0.647, "step": 9972 }, { "epoch": 1.45, "grad_norm": 6.46651029586792, "learning_rate": 1.122989931882904e-06, "loss": 0.6094, "step": 9973 }, { "epoch": 1.45, "grad_norm": 6.372010231018066, "learning_rate": 1.122832849456867e-06, "loss": 0.5204, "step": 9974 }, { "epoch": 1.45, "grad_norm": 6.373832702636719, "learning_rate": 1.1226757639534526e-06, "loss": 0.5162, "step": 9975 }, { "epoch": 1.45, "grad_norm": 5.873439311981201, "learning_rate": 1.1225186753765963e-06, "loss": 0.5283, "step": 9976 }, { "epoch": 1.45, "grad_norm": 6.603073596954346, "learning_rate": 1.1223615837302336e-06, "loss": 0.6053, "step": 9977 }, { "epoch": 1.45, "grad_norm": 6.915102481842041, "learning_rate": 1.1222044890183003e-06, "loss": 0.5595, "step": 9978 }, { "epoch": 1.45, "grad_norm": 7.70034646987915, "learning_rate": 1.1220473912447318e-06, "loss": 0.6579, "step": 9979 }, { "epoch": 1.45, "grad_norm": 6.856777191162109, "learning_rate": 1.1218902904134647e-06, "loss": 0.5734, "step": 9980 }, { "epoch": 1.45, "grad_norm": 6.196906566619873, "learning_rate": 1.121733186528434e-06, "loss": 0.5743, "step": 9981 }, { "epoch": 1.45, "grad_norm": 6.063878536224365, "learning_rate": 1.121576079593576e-06, "loss": 0.5386, "step": 9982 }, { "epoch": 1.45, "grad_norm": 6.575776100158691, "learning_rate": 1.1214189696128271e-06, "loss": 0.5661, "step": 9983 }, { "epoch": 1.45, "grad_norm": 6.571028709411621, "learning_rate": 1.1212618565901235e-06, "loss": 0.4815, "step": 9984 }, { "epoch": 1.45, "grad_norm": 5.854312896728516, "learning_rate": 1.1211047405294007e-06, "loss": 0.4951, "step": 9985 }, { "epoch": 1.45, "grad_norm": 7.786696434020996, "learning_rate": 1.1209476214345957e-06, "loss": 0.6357, "step": 9986 }, { "epoch": 1.45, "grad_norm": 6.364098072052002, "learning_rate": 1.1207904993096442e-06, "loss": 0.5175, "step": 9987 }, { "epoch": 1.45, "grad_norm": 7.286077976226807, "learning_rate": 1.1206333741584833e-06, "loss": 0.5812, "step": 9988 }, { "epoch": 1.45, "grad_norm": 6.325033187866211, "learning_rate": 1.120476245985049e-06, "loss": 0.4983, "step": 9989 }, { "epoch": 1.45, "grad_norm": 6.587315082550049, "learning_rate": 1.1203191147932783e-06, "loss": 0.5548, "step": 9990 }, { "epoch": 1.45, "grad_norm": 6.812355995178223, "learning_rate": 1.1201619805871078e-06, "loss": 0.5904, "step": 9991 }, { "epoch": 1.45, "grad_norm": 6.126346588134766, "learning_rate": 1.1200048433704738e-06, "loss": 0.5292, "step": 9992 }, { "epoch": 1.45, "grad_norm": 6.966406345367432, "learning_rate": 1.1198477031473136e-06, "loss": 0.5265, "step": 9993 }, { "epoch": 1.45, "grad_norm": 6.9428229331970215, "learning_rate": 1.1196905599215638e-06, "loss": 0.5904, "step": 9994 }, { "epoch": 1.45, "grad_norm": 5.863963603973389, "learning_rate": 1.1195334136971617e-06, "loss": 0.4993, "step": 9995 }, { "epoch": 1.45, "grad_norm": 6.533718109130859, "learning_rate": 1.119376264478044e-06, "loss": 0.4997, "step": 9996 }, { "epoch": 1.45, "grad_norm": 6.738943576812744, "learning_rate": 1.119219112268148e-06, "loss": 0.6092, "step": 9997 }, { "epoch": 1.45, "grad_norm": 6.054327011108398, "learning_rate": 1.1190619570714108e-06, "loss": 0.5375, "step": 9998 }, { "epoch": 1.45, "grad_norm": 6.640442371368408, "learning_rate": 1.1189047988917699e-06, "loss": 0.51, "step": 9999 }, { "epoch": 1.45, "grad_norm": 8.03310489654541, "learning_rate": 1.1187476377331624e-06, "loss": 0.5171, "step": 10000 }, { "epoch": 1.45, "grad_norm": 6.672735214233398, "learning_rate": 1.1185904735995257e-06, "loss": 0.5092, "step": 10001 }, { "epoch": 1.45, "grad_norm": 6.082245349884033, "learning_rate": 1.1184333064947973e-06, "loss": 0.4564, "step": 10002 }, { "epoch": 1.45, "grad_norm": 6.1782379150390625, "learning_rate": 1.1182761364229149e-06, "loss": 0.5689, "step": 10003 }, { "epoch": 1.45, "grad_norm": 6.269522190093994, "learning_rate": 1.1181189633878162e-06, "loss": 0.5872, "step": 10004 }, { "epoch": 1.45, "grad_norm": 6.035767555236816, "learning_rate": 1.1179617873934387e-06, "loss": 0.5378, "step": 10005 }, { "epoch": 1.45, "grad_norm": 5.867583274841309, "learning_rate": 1.1178046084437202e-06, "loss": 0.5177, "step": 10006 }, { "epoch": 1.45, "grad_norm": 6.487741470336914, "learning_rate": 1.1176474265425987e-06, "loss": 0.5287, "step": 10007 }, { "epoch": 1.45, "grad_norm": 5.91945219039917, "learning_rate": 1.117490241694012e-06, "loss": 0.5456, "step": 10008 }, { "epoch": 1.45, "grad_norm": 6.61137056350708, "learning_rate": 1.1173330539018983e-06, "loss": 0.4909, "step": 10009 }, { "epoch": 1.45, "grad_norm": 6.625041961669922, "learning_rate": 1.1171758631701955e-06, "loss": 0.4644, "step": 10010 }, { "epoch": 1.45, "grad_norm": 6.781199932098389, "learning_rate": 1.117018669502842e-06, "loss": 0.5965, "step": 10011 }, { "epoch": 1.45, "grad_norm": 5.881963729858398, "learning_rate": 1.116861472903776e-06, "loss": 0.4989, "step": 10012 }, { "epoch": 1.45, "grad_norm": 5.931802749633789, "learning_rate": 1.1167042733769354e-06, "loss": 0.468, "step": 10013 }, { "epoch": 1.45, "grad_norm": 6.344721794128418, "learning_rate": 1.116547070926259e-06, "loss": 0.6171, "step": 10014 }, { "epoch": 1.45, "grad_norm": 6.253840446472168, "learning_rate": 1.1163898655556852e-06, "loss": 0.4885, "step": 10015 }, { "epoch": 1.45, "grad_norm": 7.016494274139404, "learning_rate": 1.1162326572691521e-06, "loss": 0.5373, "step": 10016 }, { "epoch": 1.45, "grad_norm": 5.778831958770752, "learning_rate": 1.1160754460705988e-06, "loss": 0.4763, "step": 10017 }, { "epoch": 1.45, "grad_norm": 6.844779014587402, "learning_rate": 1.1159182319639637e-06, "loss": 0.4921, "step": 10018 }, { "epoch": 1.45, "grad_norm": 5.96248722076416, "learning_rate": 1.1157610149531859e-06, "loss": 0.5114, "step": 10019 }, { "epoch": 1.45, "grad_norm": 6.78066349029541, "learning_rate": 1.115603795042204e-06, "loss": 0.5679, "step": 10020 }, { "epoch": 1.45, "grad_norm": 8.740741729736328, "learning_rate": 1.1154465722349567e-06, "loss": 0.6516, "step": 10021 }, { "epoch": 1.45, "grad_norm": 5.993765354156494, "learning_rate": 1.1152893465353829e-06, "loss": 0.4958, "step": 10022 }, { "epoch": 1.45, "grad_norm": 6.711301803588867, "learning_rate": 1.115132117947422e-06, "loss": 0.5047, "step": 10023 }, { "epoch": 1.45, "grad_norm": 6.651808738708496, "learning_rate": 1.114974886475013e-06, "loss": 0.5706, "step": 10024 }, { "epoch": 1.45, "grad_norm": 6.391470909118652, "learning_rate": 1.114817652122095e-06, "loss": 0.4875, "step": 10025 }, { "epoch": 1.45, "grad_norm": 7.162028789520264, "learning_rate": 1.1146604148926074e-06, "loss": 0.5044, "step": 10026 }, { "epoch": 1.45, "grad_norm": 6.519448757171631, "learning_rate": 1.1145031747904894e-06, "loss": 0.5496, "step": 10027 }, { "epoch": 1.46, "grad_norm": 6.834244251251221, "learning_rate": 1.1143459318196803e-06, "loss": 0.5928, "step": 10028 }, { "epoch": 1.46, "grad_norm": 6.514174461364746, "learning_rate": 1.1141886859841197e-06, "loss": 0.5811, "step": 10029 }, { "epoch": 1.46, "grad_norm": 6.429947853088379, "learning_rate": 1.114031437287747e-06, "loss": 0.5252, "step": 10030 }, { "epoch": 1.46, "grad_norm": 6.5721659660339355, "learning_rate": 1.113874185734502e-06, "loss": 0.5366, "step": 10031 }, { "epoch": 1.46, "grad_norm": 6.081449031829834, "learning_rate": 1.1137169313283243e-06, "loss": 0.5816, "step": 10032 }, { "epoch": 1.46, "grad_norm": 7.440460205078125, "learning_rate": 1.1135596740731536e-06, "loss": 0.6212, "step": 10033 }, { "epoch": 1.46, "grad_norm": 6.263105869293213, "learning_rate": 1.11340241397293e-06, "loss": 0.471, "step": 10034 }, { "epoch": 1.46, "grad_norm": 6.5580620765686035, "learning_rate": 1.113245151031593e-06, "loss": 0.5443, "step": 10035 }, { "epoch": 1.46, "grad_norm": 6.802889347076416, "learning_rate": 1.1130878852530828e-06, "loss": 0.5249, "step": 10036 }, { "epoch": 1.46, "grad_norm": 5.522132396697998, "learning_rate": 1.1129306166413393e-06, "loss": 0.4886, "step": 10037 }, { "epoch": 1.46, "grad_norm": 6.797201156616211, "learning_rate": 1.1127733452003025e-06, "loss": 0.5971, "step": 10038 }, { "epoch": 1.46, "grad_norm": 6.199737071990967, "learning_rate": 1.1126160709339131e-06, "loss": 0.5111, "step": 10039 }, { "epoch": 1.46, "grad_norm": 6.675110816955566, "learning_rate": 1.1124587938461112e-06, "loss": 0.5811, "step": 10040 }, { "epoch": 1.46, "grad_norm": 6.683038234710693, "learning_rate": 1.1123015139408368e-06, "loss": 0.5867, "step": 10041 }, { "epoch": 1.46, "grad_norm": 6.258480548858643, "learning_rate": 1.1121442312220302e-06, "loss": 0.501, "step": 10042 }, { "epoch": 1.46, "grad_norm": 5.87899923324585, "learning_rate": 1.1119869456936322e-06, "loss": 0.5246, "step": 10043 }, { "epoch": 1.46, "grad_norm": 6.43309211730957, "learning_rate": 1.1118296573595833e-06, "loss": 0.4961, "step": 10044 }, { "epoch": 1.46, "grad_norm": 7.300354957580566, "learning_rate": 1.1116723662238238e-06, "loss": 0.5736, "step": 10045 }, { "epoch": 1.46, "grad_norm": 6.62031364440918, "learning_rate": 1.1115150722902948e-06, "loss": 0.5036, "step": 10046 }, { "epoch": 1.46, "grad_norm": 6.1950836181640625, "learning_rate": 1.1113577755629374e-06, "loss": 0.5831, "step": 10047 }, { "epoch": 1.46, "grad_norm": 6.635744571685791, "learning_rate": 1.1112004760456912e-06, "loss": 0.576, "step": 10048 }, { "epoch": 1.46, "grad_norm": 6.385504245758057, "learning_rate": 1.111043173742498e-06, "loss": 0.5002, "step": 10049 }, { "epoch": 1.46, "grad_norm": 6.923097610473633, "learning_rate": 1.1108858686572982e-06, "loss": 0.5241, "step": 10050 }, { "epoch": 1.46, "grad_norm": 6.044357776641846, "learning_rate": 1.1107285607940333e-06, "loss": 0.5243, "step": 10051 }, { "epoch": 1.46, "grad_norm": 6.844362735748291, "learning_rate": 1.1105712501566442e-06, "loss": 0.5482, "step": 10052 }, { "epoch": 1.46, "grad_norm": 6.1753249168396, "learning_rate": 1.1104139367490725e-06, "loss": 0.5031, "step": 10053 }, { "epoch": 1.46, "grad_norm": 6.5615153312683105, "learning_rate": 1.1102566205752586e-06, "loss": 0.582, "step": 10054 }, { "epoch": 1.46, "grad_norm": 6.384085178375244, "learning_rate": 1.1100993016391442e-06, "loss": 0.5243, "step": 10055 }, { "epoch": 1.46, "grad_norm": 7.145779132843018, "learning_rate": 1.1099419799446707e-06, "loss": 0.6497, "step": 10056 }, { "epoch": 1.46, "grad_norm": 6.282424449920654, "learning_rate": 1.1097846554957797e-06, "loss": 0.5615, "step": 10057 }, { "epoch": 1.46, "grad_norm": 6.361376762390137, "learning_rate": 1.1096273282964126e-06, "loss": 0.5406, "step": 10058 }, { "epoch": 1.46, "grad_norm": 7.024050235748291, "learning_rate": 1.1094699983505106e-06, "loss": 0.555, "step": 10059 }, { "epoch": 1.46, "grad_norm": 7.300919532775879, "learning_rate": 1.1093126656620158e-06, "loss": 0.5557, "step": 10060 }, { "epoch": 1.46, "grad_norm": 7.050502777099609, "learning_rate": 1.10915533023487e-06, "loss": 0.5869, "step": 10061 }, { "epoch": 1.46, "grad_norm": 6.389479160308838, "learning_rate": 1.1089979920730145e-06, "loss": 0.5587, "step": 10062 }, { "epoch": 1.46, "grad_norm": 6.043205261230469, "learning_rate": 1.1088406511803914e-06, "loss": 0.4909, "step": 10063 }, { "epoch": 1.46, "grad_norm": 6.384975910186768, "learning_rate": 1.1086833075609425e-06, "loss": 0.6084, "step": 10064 }, { "epoch": 1.46, "grad_norm": 6.4994964599609375, "learning_rate": 1.1085259612186102e-06, "loss": 0.5073, "step": 10065 }, { "epoch": 1.46, "grad_norm": 6.9176435470581055, "learning_rate": 1.1083686121573363e-06, "loss": 0.527, "step": 10066 }, { "epoch": 1.46, "grad_norm": 5.7777252197265625, "learning_rate": 1.1082112603810628e-06, "loss": 0.5435, "step": 10067 }, { "epoch": 1.46, "grad_norm": 6.476013660430908, "learning_rate": 1.1080539058937322e-06, "loss": 0.4954, "step": 10068 }, { "epoch": 1.46, "grad_norm": 6.030767440795898, "learning_rate": 1.1078965486992862e-06, "loss": 0.519, "step": 10069 }, { "epoch": 1.46, "grad_norm": 7.6231231689453125, "learning_rate": 1.1077391888016676e-06, "loss": 0.591, "step": 10070 }, { "epoch": 1.46, "grad_norm": 6.913661956787109, "learning_rate": 1.1075818262048188e-06, "loss": 0.5501, "step": 10071 }, { "epoch": 1.46, "grad_norm": 6.553758144378662, "learning_rate": 1.1074244609126822e-06, "loss": 0.5908, "step": 10072 }, { "epoch": 1.46, "grad_norm": 6.883113384246826, "learning_rate": 1.1072670929292e-06, "loss": 0.4891, "step": 10073 }, { "epoch": 1.46, "grad_norm": 6.151526927947998, "learning_rate": 1.1071097222583153e-06, "loss": 0.4994, "step": 10074 }, { "epoch": 1.46, "grad_norm": 6.813228130340576, "learning_rate": 1.1069523489039708e-06, "loss": 0.59, "step": 10075 }, { "epoch": 1.46, "grad_norm": 6.508655548095703, "learning_rate": 1.1067949728701086e-06, "loss": 0.64, "step": 10076 }, { "epoch": 1.46, "grad_norm": 7.372668266296387, "learning_rate": 1.106637594160672e-06, "loss": 0.6284, "step": 10077 }, { "epoch": 1.46, "grad_norm": 6.092964172363281, "learning_rate": 1.1064802127796033e-06, "loss": 0.5569, "step": 10078 }, { "epoch": 1.46, "grad_norm": 6.486785888671875, "learning_rate": 1.1063228287308464e-06, "loss": 0.5248, "step": 10079 }, { "epoch": 1.46, "grad_norm": 7.087404727935791, "learning_rate": 1.1061654420183434e-06, "loss": 0.6168, "step": 10080 }, { "epoch": 1.46, "grad_norm": 6.382941722869873, "learning_rate": 1.1060080526460382e-06, "loss": 0.51, "step": 10081 }, { "epoch": 1.46, "grad_norm": 6.870300769805908, "learning_rate": 1.1058506606178735e-06, "loss": 0.5721, "step": 10082 }, { "epoch": 1.46, "grad_norm": 6.82058048248291, "learning_rate": 1.1056932659377922e-06, "loss": 0.5905, "step": 10083 }, { "epoch": 1.46, "grad_norm": 6.593186855316162, "learning_rate": 1.1055358686097379e-06, "loss": 0.5307, "step": 10084 }, { "epoch": 1.46, "grad_norm": 7.035054683685303, "learning_rate": 1.1053784686376539e-06, "loss": 0.5998, "step": 10085 }, { "epoch": 1.46, "grad_norm": 7.051689624786377, "learning_rate": 1.1052210660254834e-06, "loss": 0.5173, "step": 10086 }, { "epoch": 1.46, "grad_norm": 6.601240158081055, "learning_rate": 1.1050636607771702e-06, "loss": 0.5335, "step": 10087 }, { "epoch": 1.46, "grad_norm": 5.732047080993652, "learning_rate": 1.1049062528966579e-06, "loss": 0.5161, "step": 10088 }, { "epoch": 1.46, "grad_norm": 5.774315357208252, "learning_rate": 1.1047488423878898e-06, "loss": 0.5312, "step": 10089 }, { "epoch": 1.46, "grad_norm": 6.644362926483154, "learning_rate": 1.1045914292548094e-06, "loss": 0.4539, "step": 10090 }, { "epoch": 1.46, "grad_norm": 5.891959190368652, "learning_rate": 1.104434013501361e-06, "loss": 0.5174, "step": 10091 }, { "epoch": 1.46, "grad_norm": 6.330141544342041, "learning_rate": 1.1042765951314878e-06, "loss": 0.465, "step": 10092 }, { "epoch": 1.46, "grad_norm": 7.413485527038574, "learning_rate": 1.1041191741491342e-06, "loss": 0.5707, "step": 10093 }, { "epoch": 1.46, "grad_norm": 6.2731122970581055, "learning_rate": 1.1039617505582437e-06, "loss": 0.5064, "step": 10094 }, { "epoch": 1.46, "grad_norm": 6.027472019195557, "learning_rate": 1.1038043243627606e-06, "loss": 0.4964, "step": 10095 }, { "epoch": 1.46, "grad_norm": 6.5127387046813965, "learning_rate": 1.103646895566629e-06, "loss": 0.5314, "step": 10096 }, { "epoch": 1.47, "grad_norm": 6.081596374511719, "learning_rate": 1.1034894641737923e-06, "loss": 0.4783, "step": 10097 }, { "epoch": 1.47, "grad_norm": 6.521810054779053, "learning_rate": 1.1033320301881957e-06, "loss": 0.5673, "step": 10098 }, { "epoch": 1.47, "grad_norm": 6.16686487197876, "learning_rate": 1.1031745936137827e-06, "loss": 0.4882, "step": 10099 }, { "epoch": 1.47, "grad_norm": 7.516420841217041, "learning_rate": 1.103017154454498e-06, "loss": 0.6048, "step": 10100 }, { "epoch": 1.47, "grad_norm": 7.0428290367126465, "learning_rate": 1.1028597127142858e-06, "loss": 0.5934, "step": 10101 }, { "epoch": 1.47, "grad_norm": 6.086039066314697, "learning_rate": 1.1027022683970905e-06, "loss": 0.498, "step": 10102 }, { "epoch": 1.47, "grad_norm": 6.602956771850586, "learning_rate": 1.1025448215068574e-06, "loss": 0.5396, "step": 10103 }, { "epoch": 1.47, "grad_norm": 6.079061508178711, "learning_rate": 1.10238737204753e-06, "loss": 0.5356, "step": 10104 }, { "epoch": 1.47, "grad_norm": 6.801515102386475, "learning_rate": 1.1022299200230532e-06, "loss": 0.4977, "step": 10105 }, { "epoch": 1.47, "grad_norm": 6.76248025894165, "learning_rate": 1.1020724654373718e-06, "loss": 0.4872, "step": 10106 }, { "epoch": 1.47, "grad_norm": 6.3818535804748535, "learning_rate": 1.1019150082944308e-06, "loss": 0.5058, "step": 10107 }, { "epoch": 1.47, "grad_norm": 6.281378269195557, "learning_rate": 1.101757548598175e-06, "loss": 0.6143, "step": 10108 }, { "epoch": 1.47, "grad_norm": 6.376180171966553, "learning_rate": 1.1016000863525487e-06, "loss": 0.4927, "step": 10109 }, { "epoch": 1.47, "grad_norm": 6.605423927307129, "learning_rate": 1.101442621561498e-06, "loss": 0.5532, "step": 10110 }, { "epoch": 1.47, "grad_norm": 6.3866143226623535, "learning_rate": 1.1012851542289666e-06, "loss": 0.5489, "step": 10111 }, { "epoch": 1.47, "grad_norm": 6.873802661895752, "learning_rate": 1.1011276843589008e-06, "loss": 0.7063, "step": 10112 }, { "epoch": 1.47, "grad_norm": 5.896275520324707, "learning_rate": 1.1009702119552445e-06, "loss": 0.5745, "step": 10113 }, { "epoch": 1.47, "grad_norm": 6.336721420288086, "learning_rate": 1.100812737021944e-06, "loss": 0.5689, "step": 10114 }, { "epoch": 1.47, "grad_norm": 6.896685600280762, "learning_rate": 1.1006552595629441e-06, "loss": 0.5185, "step": 10115 }, { "epoch": 1.47, "grad_norm": 5.8853960037231445, "learning_rate": 1.1004977795821905e-06, "loss": 0.5691, "step": 10116 }, { "epoch": 1.47, "grad_norm": 6.014402389526367, "learning_rate": 1.1003402970836282e-06, "loss": 0.5255, "step": 10117 }, { "epoch": 1.47, "grad_norm": 5.851696491241455, "learning_rate": 1.1001828120712026e-06, "loss": 0.5116, "step": 10118 }, { "epoch": 1.47, "grad_norm": 6.602888584136963, "learning_rate": 1.1000253245488596e-06, "loss": 0.5728, "step": 10119 }, { "epoch": 1.47, "grad_norm": 5.9432268142700195, "learning_rate": 1.0998678345205446e-06, "loss": 0.5088, "step": 10120 }, { "epoch": 1.47, "grad_norm": 5.814721584320068, "learning_rate": 1.0997103419902033e-06, "loss": 0.5612, "step": 10121 }, { "epoch": 1.47, "grad_norm": 6.966043472290039, "learning_rate": 1.099552846961781e-06, "loss": 0.6026, "step": 10122 }, { "epoch": 1.47, "grad_norm": 5.726439476013184, "learning_rate": 1.0993953494392245e-06, "loss": 0.4404, "step": 10123 }, { "epoch": 1.47, "grad_norm": 6.8403143882751465, "learning_rate": 1.0992378494264787e-06, "loss": 0.5445, "step": 10124 }, { "epoch": 1.47, "grad_norm": 6.419081211090088, "learning_rate": 1.09908034692749e-06, "loss": 0.5373, "step": 10125 }, { "epoch": 1.47, "grad_norm": 6.322958469390869, "learning_rate": 1.0989228419462037e-06, "loss": 0.5586, "step": 10126 }, { "epoch": 1.47, "grad_norm": 6.821237087249756, "learning_rate": 1.0987653344865666e-06, "loss": 0.5385, "step": 10127 }, { "epoch": 1.47, "grad_norm": 6.430995464324951, "learning_rate": 1.0986078245525248e-06, "loss": 0.511, "step": 10128 }, { "epoch": 1.47, "grad_norm": 5.948526382446289, "learning_rate": 1.0984503121480238e-06, "loss": 0.498, "step": 10129 }, { "epoch": 1.47, "grad_norm": 6.049070835113525, "learning_rate": 1.0982927972770104e-06, "loss": 0.5264, "step": 10130 }, { "epoch": 1.47, "grad_norm": 6.5892415046691895, "learning_rate": 1.0981352799434307e-06, "loss": 0.5172, "step": 10131 }, { "epoch": 1.47, "grad_norm": 6.383637428283691, "learning_rate": 1.097977760151231e-06, "loss": 0.533, "step": 10132 }, { "epoch": 1.47, "grad_norm": 6.74368143081665, "learning_rate": 1.0978202379043575e-06, "loss": 0.5621, "step": 10133 }, { "epoch": 1.47, "grad_norm": 7.72568416595459, "learning_rate": 1.097662713206757e-06, "loss": 0.5423, "step": 10134 }, { "epoch": 1.47, "grad_norm": 6.135931968688965, "learning_rate": 1.097505186062376e-06, "loss": 0.5354, "step": 10135 }, { "epoch": 1.47, "grad_norm": 7.415955543518066, "learning_rate": 1.0973476564751608e-06, "loss": 0.6924, "step": 10136 }, { "epoch": 1.47, "grad_norm": 7.391045570373535, "learning_rate": 1.0971901244490583e-06, "loss": 0.5797, "step": 10137 }, { "epoch": 1.47, "grad_norm": 6.8146514892578125, "learning_rate": 1.0970325899880156e-06, "loss": 0.5888, "step": 10138 }, { "epoch": 1.47, "grad_norm": 6.3724236488342285, "learning_rate": 1.0968750530959784e-06, "loss": 0.5252, "step": 10139 }, { "epoch": 1.47, "grad_norm": 6.551000595092773, "learning_rate": 1.0967175137768945e-06, "loss": 0.5399, "step": 10140 }, { "epoch": 1.47, "grad_norm": 7.063570976257324, "learning_rate": 1.0965599720347102e-06, "loss": 0.5977, "step": 10141 }, { "epoch": 1.47, "grad_norm": 6.3149847984313965, "learning_rate": 1.0964024278733727e-06, "loss": 0.5083, "step": 10142 }, { "epoch": 1.47, "grad_norm": 7.623045444488525, "learning_rate": 1.0962448812968292e-06, "loss": 0.5782, "step": 10143 }, { "epoch": 1.47, "grad_norm": 7.021865367889404, "learning_rate": 1.0960873323090268e-06, "loss": 0.5351, "step": 10144 }, { "epoch": 1.47, "grad_norm": 5.8623762130737305, "learning_rate": 1.0959297809139121e-06, "loss": 0.5186, "step": 10145 }, { "epoch": 1.47, "grad_norm": 6.940506458282471, "learning_rate": 1.0957722271154327e-06, "loss": 0.6057, "step": 10146 }, { "epoch": 1.47, "grad_norm": 6.284700393676758, "learning_rate": 1.0956146709175355e-06, "loss": 0.5456, "step": 10147 }, { "epoch": 1.47, "grad_norm": 5.704992771148682, "learning_rate": 1.0954571123241682e-06, "loss": 0.4459, "step": 10148 }, { "epoch": 1.47, "grad_norm": 6.897886753082275, "learning_rate": 1.0952995513392783e-06, "loss": 0.5579, "step": 10149 }, { "epoch": 1.47, "grad_norm": 5.888372898101807, "learning_rate": 1.0951419879668126e-06, "loss": 0.5099, "step": 10150 }, { "epoch": 1.47, "grad_norm": 6.413941383361816, "learning_rate": 1.0949844222107197e-06, "loss": 0.4899, "step": 10151 }, { "epoch": 1.47, "grad_norm": 6.4542317390441895, "learning_rate": 1.0948268540749458e-06, "loss": 0.5978, "step": 10152 }, { "epoch": 1.47, "grad_norm": 6.853118419647217, "learning_rate": 1.0946692835634393e-06, "loss": 0.6078, "step": 10153 }, { "epoch": 1.47, "grad_norm": 6.13970422744751, "learning_rate": 1.0945117106801475e-06, "loss": 0.5488, "step": 10154 }, { "epoch": 1.47, "grad_norm": 5.778785705566406, "learning_rate": 1.0943541354290186e-06, "loss": 0.5407, "step": 10155 }, { "epoch": 1.47, "grad_norm": 6.74508810043335, "learning_rate": 1.0941965578140003e-06, "loss": 0.6086, "step": 10156 }, { "epoch": 1.47, "grad_norm": 6.803646564483643, "learning_rate": 1.09403897783904e-06, "loss": 0.5735, "step": 10157 }, { "epoch": 1.47, "grad_norm": 6.176517963409424, "learning_rate": 1.093881395508086e-06, "loss": 0.5353, "step": 10158 }, { "epoch": 1.47, "grad_norm": 6.6162109375, "learning_rate": 1.0937238108250863e-06, "loss": 0.5076, "step": 10159 }, { "epoch": 1.47, "grad_norm": 5.920779228210449, "learning_rate": 1.0935662237939888e-06, "loss": 0.4729, "step": 10160 }, { "epoch": 1.47, "grad_norm": 6.345844268798828, "learning_rate": 1.0934086344187413e-06, "loss": 0.5273, "step": 10161 }, { "epoch": 1.47, "grad_norm": 7.277969837188721, "learning_rate": 1.0932510427032923e-06, "loss": 0.5558, "step": 10162 }, { "epoch": 1.47, "grad_norm": 6.486310958862305, "learning_rate": 1.0930934486515902e-06, "loss": 0.5382, "step": 10163 }, { "epoch": 1.47, "grad_norm": 6.993760585784912, "learning_rate": 1.092935852267583e-06, "loss": 0.4627, "step": 10164 }, { "epoch": 1.47, "grad_norm": 6.341038227081299, "learning_rate": 1.0927782535552186e-06, "loss": 0.518, "step": 10165 }, { "epoch": 1.48, "grad_norm": 6.635656833648682, "learning_rate": 1.0926206525184463e-06, "loss": 0.5412, "step": 10166 }, { "epoch": 1.48, "grad_norm": 7.667259693145752, "learning_rate": 1.0924630491612137e-06, "loss": 0.5472, "step": 10167 }, { "epoch": 1.48, "grad_norm": 7.669344902038574, "learning_rate": 1.0923054434874696e-06, "loss": 0.6255, "step": 10168 }, { "epoch": 1.48, "grad_norm": 7.730522155761719, "learning_rate": 1.0921478355011628e-06, "loss": 0.5761, "step": 10169 }, { "epoch": 1.48, "grad_norm": 6.57138204574585, "learning_rate": 1.0919902252062418e-06, "loss": 0.4799, "step": 10170 }, { "epoch": 1.48, "grad_norm": 6.414452075958252, "learning_rate": 1.091832612606655e-06, "loss": 0.58, "step": 10171 }, { "epoch": 1.48, "grad_norm": 7.0837016105651855, "learning_rate": 1.0916749977063515e-06, "loss": 0.5957, "step": 10172 }, { "epoch": 1.48, "grad_norm": 7.317947864532471, "learning_rate": 1.0915173805092798e-06, "loss": 0.5754, "step": 10173 }, { "epoch": 1.48, "grad_norm": 5.544605731964111, "learning_rate": 1.0913597610193887e-06, "loss": 0.4885, "step": 10174 }, { "epoch": 1.48, "grad_norm": 6.664795398712158, "learning_rate": 1.0912021392406275e-06, "loss": 0.5223, "step": 10175 }, { "epoch": 1.48, "grad_norm": 5.506164073944092, "learning_rate": 1.0910445151769446e-06, "loss": 0.4844, "step": 10176 }, { "epoch": 1.48, "grad_norm": 5.557109355926514, "learning_rate": 1.0908868888322894e-06, "loss": 0.515, "step": 10177 }, { "epoch": 1.48, "grad_norm": 5.984297752380371, "learning_rate": 1.0907292602106108e-06, "loss": 0.567, "step": 10178 }, { "epoch": 1.48, "grad_norm": 6.427185535430908, "learning_rate": 1.0905716293158583e-06, "loss": 0.533, "step": 10179 }, { "epoch": 1.48, "grad_norm": 6.397861957550049, "learning_rate": 1.0904139961519805e-06, "loss": 0.4993, "step": 10180 }, { "epoch": 1.48, "grad_norm": 6.251540184020996, "learning_rate": 1.0902563607229272e-06, "loss": 0.4772, "step": 10181 }, { "epoch": 1.48, "grad_norm": 6.2303690910339355, "learning_rate": 1.090098723032647e-06, "loss": 0.5333, "step": 10182 }, { "epoch": 1.48, "grad_norm": 6.5582275390625, "learning_rate": 1.08994108308509e-06, "loss": 0.5364, "step": 10183 }, { "epoch": 1.48, "grad_norm": 7.00711727142334, "learning_rate": 1.0897834408842052e-06, "loss": 0.5911, "step": 10184 }, { "epoch": 1.48, "grad_norm": 5.957240104675293, "learning_rate": 1.089625796433942e-06, "loss": 0.5221, "step": 10185 }, { "epoch": 1.48, "grad_norm": 7.15818977355957, "learning_rate": 1.0894681497382505e-06, "loss": 0.571, "step": 10186 }, { "epoch": 1.48, "grad_norm": 6.295886993408203, "learning_rate": 1.0893105008010798e-06, "loss": 0.5649, "step": 10187 }, { "epoch": 1.48, "grad_norm": 7.004045486450195, "learning_rate": 1.0891528496263796e-06, "loss": 0.5656, "step": 10188 }, { "epoch": 1.48, "grad_norm": 6.5319504737854, "learning_rate": 1.0889951962180994e-06, "loss": 0.564, "step": 10189 }, { "epoch": 1.48, "grad_norm": 6.282661437988281, "learning_rate": 1.0888375405801893e-06, "loss": 0.5221, "step": 10190 }, { "epoch": 1.48, "grad_norm": 5.973755836486816, "learning_rate": 1.088679882716599e-06, "loss": 0.5151, "step": 10191 }, { "epoch": 1.48, "grad_norm": 7.801616191864014, "learning_rate": 1.0885222226312782e-06, "loss": 0.6505, "step": 10192 }, { "epoch": 1.48, "grad_norm": 6.773520469665527, "learning_rate": 1.088364560328177e-06, "loss": 0.5827, "step": 10193 }, { "epoch": 1.48, "grad_norm": 6.219058990478516, "learning_rate": 1.0882068958112453e-06, "loss": 0.4926, "step": 10194 }, { "epoch": 1.48, "grad_norm": 6.547220706939697, "learning_rate": 1.0880492290844333e-06, "loss": 0.4917, "step": 10195 }, { "epoch": 1.48, "grad_norm": 5.909627437591553, "learning_rate": 1.0878915601516908e-06, "loss": 0.4969, "step": 10196 }, { "epoch": 1.48, "grad_norm": 6.003441333770752, "learning_rate": 1.0877338890169677e-06, "loss": 0.5059, "step": 10197 }, { "epoch": 1.48, "grad_norm": 6.503777027130127, "learning_rate": 1.0875762156842152e-06, "loss": 0.5905, "step": 10198 }, { "epoch": 1.48, "grad_norm": 6.822607517242432, "learning_rate": 1.0874185401573826e-06, "loss": 0.5046, "step": 10199 }, { "epoch": 1.48, "grad_norm": 5.93223762512207, "learning_rate": 1.0872608624404208e-06, "loss": 0.5211, "step": 10200 }, { "epoch": 1.48, "grad_norm": 6.526012897491455, "learning_rate": 1.0871031825372796e-06, "loss": 0.5331, "step": 10201 }, { "epoch": 1.48, "grad_norm": 6.857247829437256, "learning_rate": 1.0869455004519097e-06, "loss": 0.5299, "step": 10202 }, { "epoch": 1.48, "grad_norm": 6.626195430755615, "learning_rate": 1.0867878161882615e-06, "loss": 0.5638, "step": 10203 }, { "epoch": 1.48, "grad_norm": 6.6638407707214355, "learning_rate": 1.0866301297502857e-06, "loss": 0.5286, "step": 10204 }, { "epoch": 1.48, "grad_norm": 8.221009254455566, "learning_rate": 1.0864724411419328e-06, "loss": 0.7152, "step": 10205 }, { "epoch": 1.48, "grad_norm": 6.673225402832031, "learning_rate": 1.0863147503671532e-06, "loss": 0.5065, "step": 10206 }, { "epoch": 1.48, "grad_norm": 6.447088241577148, "learning_rate": 1.086157057429898e-06, "loss": 0.5457, "step": 10207 }, { "epoch": 1.48, "grad_norm": 6.442933559417725, "learning_rate": 1.0859993623341177e-06, "loss": 0.5515, "step": 10208 }, { "epoch": 1.48, "grad_norm": 6.229515552520752, "learning_rate": 1.085841665083763e-06, "loss": 0.5227, "step": 10209 }, { "epoch": 1.48, "grad_norm": 6.436028957366943, "learning_rate": 1.085683965682785e-06, "loss": 0.5746, "step": 10210 }, { "epoch": 1.48, "grad_norm": 6.835520267486572, "learning_rate": 1.0855262641351342e-06, "loss": 0.6829, "step": 10211 }, { "epoch": 1.48, "grad_norm": 7.130603790283203, "learning_rate": 1.085368560444762e-06, "loss": 0.4452, "step": 10212 }, { "epoch": 1.48, "grad_norm": 6.459794998168945, "learning_rate": 1.085210854615619e-06, "loss": 0.5273, "step": 10213 }, { "epoch": 1.48, "grad_norm": 5.932445526123047, "learning_rate": 1.0850531466516568e-06, "loss": 0.443, "step": 10214 }, { "epoch": 1.48, "grad_norm": 6.509034156799316, "learning_rate": 1.0848954365568263e-06, "loss": 0.5711, "step": 10215 }, { "epoch": 1.48, "grad_norm": 6.247066497802734, "learning_rate": 1.0847377243350784e-06, "loss": 0.5438, "step": 10216 }, { "epoch": 1.48, "grad_norm": 7.12847900390625, "learning_rate": 1.0845800099903642e-06, "loss": 0.5612, "step": 10217 }, { "epoch": 1.48, "grad_norm": 6.0460524559021, "learning_rate": 1.0844222935266353e-06, "loss": 0.4676, "step": 10218 }, { "epoch": 1.48, "grad_norm": 6.30131721496582, "learning_rate": 1.0842645749478434e-06, "loss": 0.6006, "step": 10219 }, { "epoch": 1.48, "grad_norm": 6.859350204467773, "learning_rate": 1.0841068542579398e-06, "loss": 0.5249, "step": 10220 }, { "epoch": 1.48, "grad_norm": 7.361735820770264, "learning_rate": 1.0839491314608748e-06, "loss": 0.5918, "step": 10221 }, { "epoch": 1.48, "grad_norm": 6.484634876251221, "learning_rate": 1.0837914065606014e-06, "loss": 0.5091, "step": 10222 }, { "epoch": 1.48, "grad_norm": 6.542601585388184, "learning_rate": 1.08363367956107e-06, "loss": 0.5559, "step": 10223 }, { "epoch": 1.48, "grad_norm": 6.402290344238281, "learning_rate": 1.0834759504662326e-06, "loss": 0.5148, "step": 10224 }, { "epoch": 1.48, "grad_norm": 5.899686336517334, "learning_rate": 1.0833182192800411e-06, "loss": 0.6122, "step": 10225 }, { "epoch": 1.48, "grad_norm": 6.614558219909668, "learning_rate": 1.0831604860064469e-06, "loss": 0.6304, "step": 10226 }, { "epoch": 1.48, "grad_norm": 6.212783336639404, "learning_rate": 1.083002750649402e-06, "loss": 0.4859, "step": 10227 }, { "epoch": 1.48, "grad_norm": 6.525177478790283, "learning_rate": 1.082845013212858e-06, "loss": 0.5122, "step": 10228 }, { "epoch": 1.48, "grad_norm": 6.30770206451416, "learning_rate": 1.0826872737007668e-06, "loss": 0.5284, "step": 10229 }, { "epoch": 1.48, "grad_norm": 7.486073017120361, "learning_rate": 1.0825295321170805e-06, "loss": 0.5702, "step": 10230 }, { "epoch": 1.48, "grad_norm": 6.747082233428955, "learning_rate": 1.0823717884657505e-06, "loss": 0.5668, "step": 10231 }, { "epoch": 1.48, "grad_norm": 6.696442604064941, "learning_rate": 1.0822140427507294e-06, "loss": 0.5416, "step": 10232 }, { "epoch": 1.48, "grad_norm": 6.250690937042236, "learning_rate": 1.0820562949759688e-06, "loss": 0.4781, "step": 10233 }, { "epoch": 1.48, "grad_norm": 6.955477237701416, "learning_rate": 1.0818985451454215e-06, "loss": 0.5841, "step": 10234 }, { "epoch": 1.49, "grad_norm": 6.211015701293945, "learning_rate": 1.081740793263039e-06, "loss": 0.5083, "step": 10235 }, { "epoch": 1.49, "grad_norm": 6.393054485321045, "learning_rate": 1.0815830393327736e-06, "loss": 0.5827, "step": 10236 }, { "epoch": 1.49, "grad_norm": 6.477139472961426, "learning_rate": 1.0814252833585777e-06, "loss": 0.5795, "step": 10237 }, { "epoch": 1.49, "grad_norm": 6.354086875915527, "learning_rate": 1.081267525344404e-06, "loss": 0.5595, "step": 10238 }, { "epoch": 1.49, "grad_norm": 6.194551467895508, "learning_rate": 1.081109765294204e-06, "loss": 0.5217, "step": 10239 }, { "epoch": 1.49, "grad_norm": 6.516597747802734, "learning_rate": 1.0809520032119308e-06, "loss": 0.5956, "step": 10240 }, { "epoch": 1.49, "grad_norm": 6.6956000328063965, "learning_rate": 1.0807942391015369e-06, "loss": 0.5916, "step": 10241 }, { "epoch": 1.49, "grad_norm": 6.029544353485107, "learning_rate": 1.0806364729669745e-06, "loss": 0.5451, "step": 10242 }, { "epoch": 1.49, "grad_norm": 6.605749607086182, "learning_rate": 1.080478704812196e-06, "loss": 0.5472, "step": 10243 }, { "epoch": 1.49, "grad_norm": 6.234044075012207, "learning_rate": 1.0803209346411546e-06, "loss": 0.5171, "step": 10244 }, { "epoch": 1.49, "grad_norm": 7.409205436706543, "learning_rate": 1.0801631624578027e-06, "loss": 0.584, "step": 10245 }, { "epoch": 1.49, "grad_norm": 6.1654534339904785, "learning_rate": 1.080005388266093e-06, "loss": 0.5495, "step": 10246 }, { "epoch": 1.49, "grad_norm": 7.147002220153809, "learning_rate": 1.0798476120699782e-06, "loss": 0.6026, "step": 10247 }, { "epoch": 1.49, "grad_norm": 5.987020969390869, "learning_rate": 1.0796898338734115e-06, "loss": 0.4905, "step": 10248 }, { "epoch": 1.49, "grad_norm": 6.482719898223877, "learning_rate": 1.0795320536803452e-06, "loss": 0.5945, "step": 10249 }, { "epoch": 1.49, "grad_norm": 6.798208236694336, "learning_rate": 1.0793742714947324e-06, "loss": 0.5379, "step": 10250 }, { "epoch": 1.49, "grad_norm": 5.977337837219238, "learning_rate": 1.0792164873205265e-06, "loss": 0.5206, "step": 10251 }, { "epoch": 1.49, "grad_norm": 8.439550399780273, "learning_rate": 1.0790587011616801e-06, "loss": 0.6394, "step": 10252 }, { "epoch": 1.49, "grad_norm": 5.944823741912842, "learning_rate": 1.0789009130221465e-06, "loss": 0.4842, "step": 10253 }, { "epoch": 1.49, "grad_norm": 6.345170021057129, "learning_rate": 1.0787431229058785e-06, "loss": 0.5536, "step": 10254 }, { "epoch": 1.49, "grad_norm": 7.125339984893799, "learning_rate": 1.0785853308168297e-06, "loss": 0.5643, "step": 10255 }, { "epoch": 1.49, "grad_norm": 6.438746452331543, "learning_rate": 1.0784275367589532e-06, "loss": 0.5002, "step": 10256 }, { "epoch": 1.49, "grad_norm": 6.204833030700684, "learning_rate": 1.0782697407362021e-06, "loss": 0.5421, "step": 10257 }, { "epoch": 1.49, "grad_norm": 6.2170329093933105, "learning_rate": 1.0781119427525297e-06, "loss": 0.5606, "step": 10258 }, { "epoch": 1.49, "grad_norm": 6.597475051879883, "learning_rate": 1.0779541428118897e-06, "loss": 0.5957, "step": 10259 }, { "epoch": 1.49, "grad_norm": 6.602585792541504, "learning_rate": 1.0777963409182351e-06, "loss": 0.5106, "step": 10260 }, { "epoch": 1.49, "grad_norm": 6.862720012664795, "learning_rate": 1.0776385370755198e-06, "loss": 0.5191, "step": 10261 }, { "epoch": 1.49, "grad_norm": 6.519749641418457, "learning_rate": 1.0774807312876968e-06, "loss": 0.5043, "step": 10262 }, { "epoch": 1.49, "grad_norm": 6.17026424407959, "learning_rate": 1.0773229235587202e-06, "loss": 0.5436, "step": 10263 }, { "epoch": 1.49, "grad_norm": 7.133181095123291, "learning_rate": 1.0771651138925434e-06, "loss": 0.5443, "step": 10264 }, { "epoch": 1.49, "grad_norm": 6.336984157562256, "learning_rate": 1.07700730229312e-06, "loss": 0.4828, "step": 10265 }, { "epoch": 1.49, "grad_norm": 6.347104072570801, "learning_rate": 1.0768494887644038e-06, "loss": 0.5564, "step": 10266 }, { "epoch": 1.49, "grad_norm": 6.639007091522217, "learning_rate": 1.0766916733103484e-06, "loss": 0.4989, "step": 10267 }, { "epoch": 1.49, "grad_norm": 6.717399597167969, "learning_rate": 1.0765338559349078e-06, "loss": 0.6213, "step": 10268 }, { "epoch": 1.49, "grad_norm": 6.662511825561523, "learning_rate": 1.0763760366420358e-06, "loss": 0.551, "step": 10269 }, { "epoch": 1.49, "grad_norm": 6.545748710632324, "learning_rate": 1.0762182154356863e-06, "loss": 0.5704, "step": 10270 }, { "epoch": 1.49, "grad_norm": 6.766419887542725, "learning_rate": 1.0760603923198132e-06, "loss": 0.5524, "step": 10271 }, { "epoch": 1.49, "grad_norm": 6.943986892700195, "learning_rate": 1.0759025672983706e-06, "loss": 0.6012, "step": 10272 }, { "epoch": 1.49, "grad_norm": 8.290536880493164, "learning_rate": 1.0757447403753127e-06, "loss": 0.6286, "step": 10273 }, { "epoch": 1.49, "grad_norm": 6.1725993156433105, "learning_rate": 1.075586911554593e-06, "loss": 0.5388, "step": 10274 }, { "epoch": 1.49, "grad_norm": 6.155268669128418, "learning_rate": 1.0754290808401662e-06, "loss": 0.4929, "step": 10275 }, { "epoch": 1.49, "grad_norm": 5.899920463562012, "learning_rate": 1.0752712482359868e-06, "loss": 0.5246, "step": 10276 }, { "epoch": 1.49, "grad_norm": 8.026254653930664, "learning_rate": 1.0751134137460081e-06, "loss": 0.5684, "step": 10277 }, { "epoch": 1.49, "grad_norm": 6.563458442687988, "learning_rate": 1.074955577374185e-06, "loss": 0.4455, "step": 10278 }, { "epoch": 1.49, "grad_norm": 5.9360857009887695, "learning_rate": 1.0747977391244715e-06, "loss": 0.5211, "step": 10279 }, { "epoch": 1.49, "grad_norm": 7.451582431793213, "learning_rate": 1.0746398990008223e-06, "loss": 0.5934, "step": 10280 }, { "epoch": 1.49, "grad_norm": 6.661538600921631, "learning_rate": 1.0744820570071918e-06, "loss": 0.5682, "step": 10281 }, { "epoch": 1.49, "grad_norm": 5.870413303375244, "learning_rate": 1.0743242131475344e-06, "loss": 0.5148, "step": 10282 }, { "epoch": 1.49, "grad_norm": 7.286141872406006, "learning_rate": 1.0741663674258046e-06, "loss": 0.4933, "step": 10283 }, { "epoch": 1.49, "grad_norm": 7.097833633422852, "learning_rate": 1.0740085198459567e-06, "loss": 0.5821, "step": 10284 }, { "epoch": 1.49, "grad_norm": 7.447466850280762, "learning_rate": 1.0738506704119458e-06, "loss": 0.61, "step": 10285 }, { "epoch": 1.49, "grad_norm": 7.05034875869751, "learning_rate": 1.0736928191277263e-06, "loss": 0.4878, "step": 10286 }, { "epoch": 1.49, "grad_norm": 6.251274585723877, "learning_rate": 1.073534965997253e-06, "loss": 0.5338, "step": 10287 }, { "epoch": 1.49, "grad_norm": 6.986958026885986, "learning_rate": 1.0733771110244808e-06, "loss": 0.5465, "step": 10288 }, { "epoch": 1.49, "grad_norm": 7.5933756828308105, "learning_rate": 1.073219254213364e-06, "loss": 0.5943, "step": 10289 }, { "epoch": 1.49, "grad_norm": 6.786388397216797, "learning_rate": 1.0730613955678579e-06, "loss": 0.5271, "step": 10290 }, { "epoch": 1.49, "grad_norm": 6.577713489532471, "learning_rate": 1.0729035350919174e-06, "loss": 0.4957, "step": 10291 }, { "epoch": 1.49, "grad_norm": 6.418140411376953, "learning_rate": 1.072745672789497e-06, "loss": 0.4953, "step": 10292 }, { "epoch": 1.49, "grad_norm": 6.278169631958008, "learning_rate": 1.072587808664552e-06, "loss": 0.5102, "step": 10293 }, { "epoch": 1.49, "grad_norm": 6.700071334838867, "learning_rate": 1.0724299427210375e-06, "loss": 0.5082, "step": 10294 }, { "epoch": 1.49, "grad_norm": 5.86462926864624, "learning_rate": 1.0722720749629081e-06, "loss": 0.5264, "step": 10295 }, { "epoch": 1.49, "grad_norm": 6.451826572418213, "learning_rate": 1.0721142053941196e-06, "loss": 0.541, "step": 10296 }, { "epoch": 1.49, "grad_norm": 6.706973552703857, "learning_rate": 1.071956334018627e-06, "loss": 0.5087, "step": 10297 }, { "epoch": 1.49, "grad_norm": 7.966065883636475, "learning_rate": 1.0717984608403854e-06, "loss": 0.6858, "step": 10298 }, { "epoch": 1.49, "grad_norm": 6.7133097648620605, "learning_rate": 1.0716405858633498e-06, "loss": 0.5205, "step": 10299 }, { "epoch": 1.49, "grad_norm": 6.739966869354248, "learning_rate": 1.0714827090914758e-06, "loss": 0.555, "step": 10300 }, { "epoch": 1.49, "grad_norm": 6.453762054443359, "learning_rate": 1.0713248305287185e-06, "loss": 0.521, "step": 10301 }, { "epoch": 1.49, "grad_norm": 6.309103965759277, "learning_rate": 1.0711669501790337e-06, "loss": 0.5487, "step": 10302 }, { "epoch": 1.49, "grad_norm": 6.086435317993164, "learning_rate": 1.0710090680463764e-06, "loss": 0.4938, "step": 10303 }, { "epoch": 1.5, "grad_norm": 6.0096330642700195, "learning_rate": 1.0708511841347027e-06, "loss": 0.5082, "step": 10304 }, { "epoch": 1.5, "grad_norm": 6.776187896728516, "learning_rate": 1.0706932984479674e-06, "loss": 0.5331, "step": 10305 }, { "epoch": 1.5, "grad_norm": 6.645298004150391, "learning_rate": 1.0705354109901263e-06, "loss": 0.5539, "step": 10306 }, { "epoch": 1.5, "grad_norm": 5.958103179931641, "learning_rate": 1.0703775217651353e-06, "loss": 0.5105, "step": 10307 }, { "epoch": 1.5, "grad_norm": 5.975673675537109, "learning_rate": 1.0702196307769496e-06, "loss": 0.5483, "step": 10308 }, { "epoch": 1.5, "grad_norm": 6.1004180908203125, "learning_rate": 1.070061738029525e-06, "loss": 0.5282, "step": 10309 }, { "epoch": 1.5, "grad_norm": 6.5999979972839355, "learning_rate": 1.0699038435268176e-06, "loss": 0.6211, "step": 10310 }, { "epoch": 1.5, "grad_norm": 6.923366546630859, "learning_rate": 1.0697459472727834e-06, "loss": 0.5021, "step": 10311 }, { "epoch": 1.5, "grad_norm": 6.326557159423828, "learning_rate": 1.0695880492713773e-06, "loss": 0.508, "step": 10312 }, { "epoch": 1.5, "grad_norm": 6.1066083908081055, "learning_rate": 1.0694301495265558e-06, "loss": 0.5933, "step": 10313 }, { "epoch": 1.5, "grad_norm": 7.492067813873291, "learning_rate": 1.069272248042275e-06, "loss": 0.6997, "step": 10314 }, { "epoch": 1.5, "grad_norm": 6.336199760437012, "learning_rate": 1.0691143448224902e-06, "loss": 0.4772, "step": 10315 }, { "epoch": 1.5, "grad_norm": 6.209884166717529, "learning_rate": 1.0689564398711578e-06, "loss": 0.5378, "step": 10316 }, { "epoch": 1.5, "grad_norm": 6.206302642822266, "learning_rate": 1.068798533192234e-06, "loss": 0.5278, "step": 10317 }, { "epoch": 1.5, "grad_norm": 7.540693759918213, "learning_rate": 1.0686406247896746e-06, "loss": 0.5572, "step": 10318 }, { "epoch": 1.5, "grad_norm": 6.594023704528809, "learning_rate": 1.0684827146674361e-06, "loss": 0.4733, "step": 10319 }, { "epoch": 1.5, "grad_norm": 6.342220783233643, "learning_rate": 1.0683248028294744e-06, "loss": 0.4878, "step": 10320 }, { "epoch": 1.5, "grad_norm": 7.197851657867432, "learning_rate": 1.0681668892797458e-06, "loss": 0.5487, "step": 10321 }, { "epoch": 1.5, "grad_norm": 7.332990646362305, "learning_rate": 1.0680089740222062e-06, "loss": 0.557, "step": 10322 }, { "epoch": 1.5, "grad_norm": 6.282794952392578, "learning_rate": 1.0678510570608123e-06, "loss": 0.5002, "step": 10323 }, { "epoch": 1.5, "grad_norm": 6.914468765258789, "learning_rate": 1.0676931383995207e-06, "loss": 0.6094, "step": 10324 }, { "epoch": 1.5, "grad_norm": 6.610781192779541, "learning_rate": 1.0675352180422877e-06, "loss": 0.5619, "step": 10325 }, { "epoch": 1.5, "grad_norm": 6.212981700897217, "learning_rate": 1.067377295993069e-06, "loss": 0.5518, "step": 10326 }, { "epoch": 1.5, "grad_norm": 6.250131607055664, "learning_rate": 1.067219372255822e-06, "loss": 0.547, "step": 10327 }, { "epoch": 1.5, "grad_norm": 6.79180383682251, "learning_rate": 1.067061446834503e-06, "loss": 0.495, "step": 10328 }, { "epoch": 1.5, "grad_norm": 7.4113688468933105, "learning_rate": 1.0669035197330681e-06, "loss": 0.5203, "step": 10329 }, { "epoch": 1.5, "grad_norm": 6.974905014038086, "learning_rate": 1.0667455909554742e-06, "loss": 0.5373, "step": 10330 }, { "epoch": 1.5, "grad_norm": 6.015565395355225, "learning_rate": 1.066587660505678e-06, "loss": 0.5052, "step": 10331 }, { "epoch": 1.5, "grad_norm": 6.720544338226318, "learning_rate": 1.0664297283876365e-06, "loss": 0.6014, "step": 10332 }, { "epoch": 1.5, "grad_norm": 6.355876922607422, "learning_rate": 1.0662717946053057e-06, "loss": 0.5995, "step": 10333 }, { "epoch": 1.5, "grad_norm": 6.511727809906006, "learning_rate": 1.0661138591626429e-06, "loss": 0.5683, "step": 10334 }, { "epoch": 1.5, "grad_norm": 6.563081741333008, "learning_rate": 1.0659559220636048e-06, "loss": 0.5122, "step": 10335 }, { "epoch": 1.5, "grad_norm": 6.064972877502441, "learning_rate": 1.065797983312148e-06, "loss": 0.5545, "step": 10336 }, { "epoch": 1.5, "grad_norm": 5.629307746887207, "learning_rate": 1.06564004291223e-06, "loss": 0.4939, "step": 10337 }, { "epoch": 1.5, "grad_norm": 7.585522651672363, "learning_rate": 1.065482100867807e-06, "loss": 0.5972, "step": 10338 }, { "epoch": 1.5, "grad_norm": 7.998915672302246, "learning_rate": 1.0653241571828368e-06, "loss": 0.5687, "step": 10339 }, { "epoch": 1.5, "grad_norm": 6.5146284103393555, "learning_rate": 1.065166211861276e-06, "loss": 0.5305, "step": 10340 }, { "epoch": 1.5, "grad_norm": 6.705307483673096, "learning_rate": 1.0650082649070813e-06, "loss": 0.5187, "step": 10341 }, { "epoch": 1.5, "grad_norm": 6.1657304763793945, "learning_rate": 1.0648503163242103e-06, "loss": 0.5176, "step": 10342 }, { "epoch": 1.5, "grad_norm": 8.00501537322998, "learning_rate": 1.06469236611662e-06, "loss": 0.5893, "step": 10343 }, { "epoch": 1.5, "grad_norm": 6.376821994781494, "learning_rate": 1.0645344142882674e-06, "loss": 0.5231, "step": 10344 }, { "epoch": 1.5, "grad_norm": 5.9760661125183105, "learning_rate": 1.0643764608431098e-06, "loss": 0.5308, "step": 10345 }, { "epoch": 1.5, "grad_norm": 5.895994663238525, "learning_rate": 1.0642185057851047e-06, "loss": 0.5629, "step": 10346 }, { "epoch": 1.5, "grad_norm": 6.5192437171936035, "learning_rate": 1.0640605491182098e-06, "loss": 0.565, "step": 10347 }, { "epoch": 1.5, "grad_norm": 6.451137065887451, "learning_rate": 1.0639025908463813e-06, "loss": 0.4817, "step": 10348 }, { "epoch": 1.5, "grad_norm": 6.458469867706299, "learning_rate": 1.0637446309735773e-06, "loss": 0.5369, "step": 10349 }, { "epoch": 1.5, "grad_norm": 6.319636344909668, "learning_rate": 1.0635866695037552e-06, "loss": 0.5337, "step": 10350 }, { "epoch": 1.5, "grad_norm": 7.116851806640625, "learning_rate": 1.0634287064408724e-06, "loss": 0.6021, "step": 10351 }, { "epoch": 1.5, "grad_norm": 6.342573642730713, "learning_rate": 1.0632707417888868e-06, "loss": 0.5335, "step": 10352 }, { "epoch": 1.5, "grad_norm": 6.27859354019165, "learning_rate": 1.0631127755517553e-06, "loss": 0.4538, "step": 10353 }, { "epoch": 1.5, "grad_norm": 6.262411594390869, "learning_rate": 1.0629548077334358e-06, "loss": 0.5276, "step": 10354 }, { "epoch": 1.5, "grad_norm": 5.970278739929199, "learning_rate": 1.0627968383378859e-06, "loss": 0.6087, "step": 10355 }, { "epoch": 1.5, "grad_norm": 6.5185394287109375, "learning_rate": 1.062638867369063e-06, "loss": 0.514, "step": 10356 }, { "epoch": 1.5, "grad_norm": 6.7096781730651855, "learning_rate": 1.0624808948309253e-06, "loss": 0.5907, "step": 10357 }, { "epoch": 1.5, "grad_norm": 7.57647705078125, "learning_rate": 1.0623229207274303e-06, "loss": 0.6406, "step": 10358 }, { "epoch": 1.5, "grad_norm": 6.365177631378174, "learning_rate": 1.0621649450625356e-06, "loss": 0.601, "step": 10359 }, { "epoch": 1.5, "grad_norm": 5.8877644538879395, "learning_rate": 1.0620069678401994e-06, "loss": 0.457, "step": 10360 }, { "epoch": 1.5, "grad_norm": 6.646810531616211, "learning_rate": 1.0618489890643795e-06, "loss": 0.5354, "step": 10361 }, { "epoch": 1.5, "grad_norm": 6.43342399597168, "learning_rate": 1.0616910087390334e-06, "loss": 0.5275, "step": 10362 }, { "epoch": 1.5, "grad_norm": 7.127610206604004, "learning_rate": 1.0615330268681193e-06, "loss": 0.614, "step": 10363 }, { "epoch": 1.5, "grad_norm": 6.800400733947754, "learning_rate": 1.0613750434555952e-06, "loss": 0.6351, "step": 10364 }, { "epoch": 1.5, "grad_norm": 7.155236721038818, "learning_rate": 1.0612170585054192e-06, "loss": 0.6182, "step": 10365 }, { "epoch": 1.5, "grad_norm": 6.626527309417725, "learning_rate": 1.0610590720215492e-06, "loss": 0.5371, "step": 10366 }, { "epoch": 1.5, "grad_norm": 5.9044508934021, "learning_rate": 1.0609010840079435e-06, "loss": 0.51, "step": 10367 }, { "epoch": 1.5, "grad_norm": 6.486735820770264, "learning_rate": 1.06074309446856e-06, "loss": 0.5186, "step": 10368 }, { "epoch": 1.5, "grad_norm": 6.706589221954346, "learning_rate": 1.060585103407357e-06, "loss": 0.5382, "step": 10369 }, { "epoch": 1.5, "grad_norm": 6.652522563934326, "learning_rate": 1.0604271108282927e-06, "loss": 0.5632, "step": 10370 }, { "epoch": 1.5, "grad_norm": 6.428364276885986, "learning_rate": 1.0602691167353252e-06, "loss": 0.5346, "step": 10371 }, { "epoch": 1.5, "grad_norm": 6.114902973175049, "learning_rate": 1.0601111211324129e-06, "loss": 0.5498, "step": 10372 }, { "epoch": 1.51, "grad_norm": 7.008514881134033, "learning_rate": 1.0599531240235143e-06, "loss": 0.5492, "step": 10373 }, { "epoch": 1.51, "grad_norm": 6.807688236236572, "learning_rate": 1.0597951254125874e-06, "loss": 0.5737, "step": 10374 }, { "epoch": 1.51, "grad_norm": 6.514030933380127, "learning_rate": 1.0596371253035909e-06, "loss": 0.5306, "step": 10375 }, { "epoch": 1.51, "grad_norm": 6.461719989776611, "learning_rate": 1.0594791237004833e-06, "loss": 0.4857, "step": 10376 }, { "epoch": 1.51, "grad_norm": 6.5922322273254395, "learning_rate": 1.0593211206072226e-06, "loss": 0.5608, "step": 10377 }, { "epoch": 1.51, "grad_norm": 6.777355194091797, "learning_rate": 1.0591631160277676e-06, "loss": 0.5598, "step": 10378 }, { "epoch": 1.51, "grad_norm": 6.688857078552246, "learning_rate": 1.059005109966077e-06, "loss": 0.5468, "step": 10379 }, { "epoch": 1.51, "grad_norm": 6.4602179527282715, "learning_rate": 1.0588471024261094e-06, "loss": 0.5408, "step": 10380 }, { "epoch": 1.51, "grad_norm": 6.3138227462768555, "learning_rate": 1.058689093411823e-06, "loss": 0.4971, "step": 10381 }, { "epoch": 1.51, "grad_norm": 6.321013927459717, "learning_rate": 1.0585310829271769e-06, "loss": 0.4817, "step": 10382 }, { "epoch": 1.51, "grad_norm": 5.799592971801758, "learning_rate": 1.0583730709761293e-06, "loss": 0.4608, "step": 10383 }, { "epoch": 1.51, "grad_norm": 5.954801559448242, "learning_rate": 1.0582150575626397e-06, "loss": 0.5715, "step": 10384 }, { "epoch": 1.51, "grad_norm": 6.196135520935059, "learning_rate": 1.0580570426906661e-06, "loss": 0.5444, "step": 10385 }, { "epoch": 1.51, "grad_norm": 6.118124485015869, "learning_rate": 1.0578990263641678e-06, "loss": 0.5903, "step": 10386 }, { "epoch": 1.51, "grad_norm": 5.7069244384765625, "learning_rate": 1.0577410085871033e-06, "loss": 0.4768, "step": 10387 }, { "epoch": 1.51, "grad_norm": 6.725386142730713, "learning_rate": 1.057582989363432e-06, "loss": 0.5242, "step": 10388 }, { "epoch": 1.51, "grad_norm": 6.631172180175781, "learning_rate": 1.0574249686971121e-06, "loss": 0.5967, "step": 10389 }, { "epoch": 1.51, "grad_norm": 6.847554683685303, "learning_rate": 1.0572669465921031e-06, "loss": 0.5332, "step": 10390 }, { "epoch": 1.51, "grad_norm": 5.9990081787109375, "learning_rate": 1.0571089230523637e-06, "loss": 0.5143, "step": 10391 }, { "epoch": 1.51, "grad_norm": 6.031048774719238, "learning_rate": 1.0569508980818532e-06, "loss": 0.555, "step": 10392 }, { "epoch": 1.51, "grad_norm": 7.352614402770996, "learning_rate": 1.0567928716845302e-06, "loss": 0.5814, "step": 10393 }, { "epoch": 1.51, "grad_norm": 6.22437047958374, "learning_rate": 1.0566348438643542e-06, "loss": 0.5888, "step": 10394 }, { "epoch": 1.51, "grad_norm": 6.9483466148376465, "learning_rate": 1.0564768146252845e-06, "loss": 0.5882, "step": 10395 }, { "epoch": 1.51, "grad_norm": 6.220613479614258, "learning_rate": 1.0563187839712796e-06, "loss": 0.5714, "step": 10396 }, { "epoch": 1.51, "grad_norm": 6.988152027130127, "learning_rate": 1.0561607519062992e-06, "loss": 0.5188, "step": 10397 }, { "epoch": 1.51, "grad_norm": 7.137022018432617, "learning_rate": 1.0560027184343024e-06, "loss": 0.5701, "step": 10398 }, { "epoch": 1.51, "grad_norm": 6.680652141571045, "learning_rate": 1.0558446835592485e-06, "loss": 0.584, "step": 10399 }, { "epoch": 1.51, "grad_norm": 6.380535125732422, "learning_rate": 1.0556866472850965e-06, "loss": 0.5108, "step": 10400 }, { "epoch": 1.51, "grad_norm": 7.370843410491943, "learning_rate": 1.0555286096158065e-06, "loss": 0.5167, "step": 10401 }, { "epoch": 1.51, "grad_norm": 6.1924662590026855, "learning_rate": 1.055370570555337e-06, "loss": 0.5112, "step": 10402 }, { "epoch": 1.51, "grad_norm": 6.954871654510498, "learning_rate": 1.0552125301076481e-06, "loss": 0.5487, "step": 10403 }, { "epoch": 1.51, "grad_norm": 6.972987651824951, "learning_rate": 1.055054488276699e-06, "loss": 0.5803, "step": 10404 }, { "epoch": 1.51, "grad_norm": 6.621455192565918, "learning_rate": 1.054896445066449e-06, "loss": 0.5346, "step": 10405 }, { "epoch": 1.51, "grad_norm": 6.5946431159973145, "learning_rate": 1.0547384004808576e-06, "loss": 0.5543, "step": 10406 }, { "epoch": 1.51, "grad_norm": 6.685993194580078, "learning_rate": 1.054580354523885e-06, "loss": 0.589, "step": 10407 }, { "epoch": 1.51, "grad_norm": 6.14516019821167, "learning_rate": 1.0544223071994902e-06, "loss": 0.5285, "step": 10408 }, { "epoch": 1.51, "grad_norm": 6.524653911590576, "learning_rate": 1.0542642585116326e-06, "loss": 0.5143, "step": 10409 }, { "epoch": 1.51, "grad_norm": 6.483651161193848, "learning_rate": 1.0541062084642724e-06, "loss": 0.4961, "step": 10410 }, { "epoch": 1.51, "grad_norm": 7.3756585121154785, "learning_rate": 1.053948157061369e-06, "loss": 0.5401, "step": 10411 }, { "epoch": 1.51, "grad_norm": 6.208105564117432, "learning_rate": 1.0537901043068822e-06, "loss": 0.5149, "step": 10412 }, { "epoch": 1.51, "grad_norm": 6.1369242668151855, "learning_rate": 1.0536320502047714e-06, "loss": 0.4965, "step": 10413 }, { "epoch": 1.51, "grad_norm": 6.220431804656982, "learning_rate": 1.0534739947589973e-06, "loss": 0.4999, "step": 10414 }, { "epoch": 1.51, "grad_norm": 6.56019926071167, "learning_rate": 1.0533159379735187e-06, "loss": 0.482, "step": 10415 }, { "epoch": 1.51, "grad_norm": 6.454026222229004, "learning_rate": 1.0531578798522963e-06, "loss": 0.5094, "step": 10416 }, { "epoch": 1.51, "grad_norm": 6.451772689819336, "learning_rate": 1.0529998203992893e-06, "loss": 0.5296, "step": 10417 }, { "epoch": 1.51, "grad_norm": 6.270920276641846, "learning_rate": 1.0528417596184582e-06, "loss": 0.5461, "step": 10418 }, { "epoch": 1.51, "grad_norm": 6.861855506896973, "learning_rate": 1.0526836975137623e-06, "loss": 0.5834, "step": 10419 }, { "epoch": 1.51, "grad_norm": 6.041746139526367, "learning_rate": 1.0525256340891622e-06, "loss": 0.513, "step": 10420 }, { "epoch": 1.51, "grad_norm": 6.25014591217041, "learning_rate": 1.0523675693486177e-06, "loss": 0.5311, "step": 10421 }, { "epoch": 1.51, "grad_norm": 7.385502815246582, "learning_rate": 1.0522095032960888e-06, "loss": 0.6301, "step": 10422 }, { "epoch": 1.51, "grad_norm": 6.795492649078369, "learning_rate": 1.0520514359355358e-06, "loss": 0.5669, "step": 10423 }, { "epoch": 1.51, "grad_norm": 7.202225208282471, "learning_rate": 1.0518933672709182e-06, "loss": 0.5228, "step": 10424 }, { "epoch": 1.51, "grad_norm": 7.164097785949707, "learning_rate": 1.0517352973061968e-06, "loss": 0.6539, "step": 10425 }, { "epoch": 1.51, "grad_norm": 5.9312334060668945, "learning_rate": 1.0515772260453313e-06, "loss": 0.526, "step": 10426 }, { "epoch": 1.51, "grad_norm": 5.73134183883667, "learning_rate": 1.0514191534922826e-06, "loss": 0.5111, "step": 10427 }, { "epoch": 1.51, "grad_norm": 6.785139083862305, "learning_rate": 1.0512610796510104e-06, "loss": 0.6024, "step": 10428 }, { "epoch": 1.51, "grad_norm": 6.981735706329346, "learning_rate": 1.0511030045254754e-06, "loss": 0.5356, "step": 10429 }, { "epoch": 1.51, "grad_norm": 6.882254600524902, "learning_rate": 1.0509449281196374e-06, "loss": 0.6, "step": 10430 }, { "epoch": 1.51, "grad_norm": 6.514306545257568, "learning_rate": 1.050786850437457e-06, "loss": 0.554, "step": 10431 }, { "epoch": 1.51, "grad_norm": 6.775106430053711, "learning_rate": 1.0506287714828948e-06, "loss": 0.531, "step": 10432 }, { "epoch": 1.51, "grad_norm": 6.509831428527832, "learning_rate": 1.0504706912599108e-06, "loss": 0.541, "step": 10433 }, { "epoch": 1.51, "grad_norm": 6.497594833374023, "learning_rate": 1.0503126097724657e-06, "loss": 0.5508, "step": 10434 }, { "epoch": 1.51, "grad_norm": 6.381799221038818, "learning_rate": 1.0501545270245197e-06, "loss": 0.5644, "step": 10435 }, { "epoch": 1.51, "grad_norm": 5.992809772491455, "learning_rate": 1.049996443020034e-06, "loss": 0.5112, "step": 10436 }, { "epoch": 1.51, "grad_norm": 6.334560871124268, "learning_rate": 1.0498383577629683e-06, "loss": 0.5407, "step": 10437 }, { "epoch": 1.51, "grad_norm": 7.841468334197998, "learning_rate": 1.0496802712572837e-06, "loss": 0.6762, "step": 10438 }, { "epoch": 1.51, "grad_norm": 6.547170639038086, "learning_rate": 1.0495221835069405e-06, "loss": 0.5774, "step": 10439 }, { "epoch": 1.51, "grad_norm": 7.018162727355957, "learning_rate": 1.0493640945158994e-06, "loss": 0.5262, "step": 10440 }, { "epoch": 1.51, "grad_norm": 5.912220478057861, "learning_rate": 1.0492060042881212e-06, "loss": 0.4824, "step": 10441 }, { "epoch": 1.52, "grad_norm": 6.474428176879883, "learning_rate": 1.0490479128275664e-06, "loss": 0.5634, "step": 10442 }, { "epoch": 1.52, "grad_norm": 6.630337715148926, "learning_rate": 1.0488898201381963e-06, "loss": 0.5216, "step": 10443 }, { "epoch": 1.52, "grad_norm": 5.901980400085449, "learning_rate": 1.0487317262239709e-06, "loss": 0.4908, "step": 10444 }, { "epoch": 1.52, "grad_norm": 6.156266212463379, "learning_rate": 1.0485736310888512e-06, "loss": 0.6019, "step": 10445 }, { "epoch": 1.52, "grad_norm": 7.83952522277832, "learning_rate": 1.048415534736798e-06, "loss": 0.5967, "step": 10446 }, { "epoch": 1.52, "grad_norm": 6.693974494934082, "learning_rate": 1.0482574371717722e-06, "loss": 0.5681, "step": 10447 }, { "epoch": 1.52, "grad_norm": 6.269360065460205, "learning_rate": 1.048099338397735e-06, "loss": 0.5413, "step": 10448 }, { "epoch": 1.52, "grad_norm": 7.123952865600586, "learning_rate": 1.047941238418647e-06, "loss": 0.5944, "step": 10449 }, { "epoch": 1.52, "grad_norm": 6.088318347930908, "learning_rate": 1.047783137238469e-06, "loss": 0.4725, "step": 10450 }, { "epoch": 1.52, "grad_norm": 7.080955982208252, "learning_rate": 1.0476250348611624e-06, "loss": 0.5756, "step": 10451 }, { "epoch": 1.52, "grad_norm": 6.558370590209961, "learning_rate": 1.0474669312906877e-06, "loss": 0.5677, "step": 10452 }, { "epoch": 1.52, "grad_norm": 7.563257217407227, "learning_rate": 1.047308826531006e-06, "loss": 0.5699, "step": 10453 }, { "epoch": 1.52, "grad_norm": 7.738017559051514, "learning_rate": 1.0471507205860787e-06, "loss": 0.5613, "step": 10454 }, { "epoch": 1.52, "grad_norm": 6.373143196105957, "learning_rate": 1.0469926134598668e-06, "loss": 0.5764, "step": 10455 }, { "epoch": 1.52, "grad_norm": 6.549372673034668, "learning_rate": 1.0468345051563311e-06, "loss": 0.5609, "step": 10456 }, { "epoch": 1.52, "grad_norm": 6.658751964569092, "learning_rate": 1.0466763956794336e-06, "loss": 0.5386, "step": 10457 }, { "epoch": 1.52, "grad_norm": 6.8748016357421875, "learning_rate": 1.0465182850331342e-06, "loss": 0.5569, "step": 10458 }, { "epoch": 1.52, "grad_norm": 6.890074253082275, "learning_rate": 1.0463601732213948e-06, "loss": 0.5575, "step": 10459 }, { "epoch": 1.52, "grad_norm": 6.730862617492676, "learning_rate": 1.046202060248177e-06, "loss": 0.5716, "step": 10460 }, { "epoch": 1.52, "grad_norm": 6.034798622131348, "learning_rate": 1.0460439461174413e-06, "loss": 0.5114, "step": 10461 }, { "epoch": 1.52, "grad_norm": 7.225240707397461, "learning_rate": 1.0458858308331493e-06, "loss": 0.4844, "step": 10462 }, { "epoch": 1.52, "grad_norm": 6.291625499725342, "learning_rate": 1.0457277143992625e-06, "loss": 0.5432, "step": 10463 }, { "epoch": 1.52, "grad_norm": 7.114438056945801, "learning_rate": 1.0455695968197422e-06, "loss": 0.5381, "step": 10464 }, { "epoch": 1.52, "grad_norm": 6.458446025848389, "learning_rate": 1.0454114780985496e-06, "loss": 0.5532, "step": 10465 }, { "epoch": 1.52, "grad_norm": 7.2231011390686035, "learning_rate": 1.0452533582396463e-06, "loss": 0.6091, "step": 10466 }, { "epoch": 1.52, "grad_norm": 6.754128456115723, "learning_rate": 1.0450952372469935e-06, "loss": 0.5452, "step": 10467 }, { "epoch": 1.52, "grad_norm": 6.285868167877197, "learning_rate": 1.0449371151245528e-06, "loss": 0.5316, "step": 10468 }, { "epoch": 1.52, "grad_norm": 5.983760356903076, "learning_rate": 1.0447789918762857e-06, "loss": 0.4634, "step": 10469 }, { "epoch": 1.52, "grad_norm": 6.468207836151123, "learning_rate": 1.0446208675061538e-06, "loss": 0.5096, "step": 10470 }, { "epoch": 1.52, "grad_norm": 6.373383045196533, "learning_rate": 1.0444627420181187e-06, "loss": 0.5571, "step": 10471 }, { "epoch": 1.52, "grad_norm": 6.240269184112549, "learning_rate": 1.0443046154161416e-06, "loss": 0.5194, "step": 10472 }, { "epoch": 1.52, "grad_norm": 6.6231913566589355, "learning_rate": 1.0441464877041843e-06, "loss": 0.5644, "step": 10473 }, { "epoch": 1.52, "grad_norm": 6.518470764160156, "learning_rate": 1.0439883588862086e-06, "loss": 0.5531, "step": 10474 }, { "epoch": 1.52, "grad_norm": 6.884676933288574, "learning_rate": 1.043830228966176e-06, "loss": 0.5705, "step": 10475 }, { "epoch": 1.52, "grad_norm": 6.356113433837891, "learning_rate": 1.0436720979480484e-06, "loss": 0.5247, "step": 10476 }, { "epoch": 1.52, "grad_norm": 7.013789653778076, "learning_rate": 1.0435139658357873e-06, "loss": 0.5567, "step": 10477 }, { "epoch": 1.52, "grad_norm": 7.887523651123047, "learning_rate": 1.0433558326333542e-06, "loss": 0.5974, "step": 10478 }, { "epoch": 1.52, "grad_norm": 6.634361267089844, "learning_rate": 1.0431976983447112e-06, "loss": 0.5325, "step": 10479 }, { "epoch": 1.52, "grad_norm": 6.203321933746338, "learning_rate": 1.0430395629738204e-06, "loss": 0.5399, "step": 10480 }, { "epoch": 1.52, "grad_norm": 6.604330062866211, "learning_rate": 1.042881426524643e-06, "loss": 0.5959, "step": 10481 }, { "epoch": 1.52, "grad_norm": 6.9402947425842285, "learning_rate": 1.0427232890011412e-06, "loss": 0.5687, "step": 10482 }, { "epoch": 1.52, "grad_norm": 6.2647857666015625, "learning_rate": 1.0425651504072767e-06, "loss": 0.51, "step": 10483 }, { "epoch": 1.52, "grad_norm": 8.710233688354492, "learning_rate": 1.0424070107470117e-06, "loss": 0.6013, "step": 10484 }, { "epoch": 1.52, "grad_norm": 5.962291240692139, "learning_rate": 1.0422488700243081e-06, "loss": 0.5176, "step": 10485 }, { "epoch": 1.52, "grad_norm": 6.697896480560303, "learning_rate": 1.0420907282431277e-06, "loss": 0.5451, "step": 10486 }, { "epoch": 1.52, "grad_norm": 6.29603385925293, "learning_rate": 1.041932585407432e-06, "loss": 0.5307, "step": 10487 }, { "epoch": 1.52, "grad_norm": 6.539132595062256, "learning_rate": 1.041774441521184e-06, "loss": 0.5812, "step": 10488 }, { "epoch": 1.52, "grad_norm": 6.585265636444092, "learning_rate": 1.041616296588345e-06, "loss": 0.4883, "step": 10489 }, { "epoch": 1.52, "grad_norm": 6.8372368812561035, "learning_rate": 1.0414581506128774e-06, "loss": 0.5724, "step": 10490 }, { "epoch": 1.52, "grad_norm": 6.174432277679443, "learning_rate": 1.0413000035987432e-06, "loss": 0.4539, "step": 10491 }, { "epoch": 1.52, "grad_norm": 6.882954120635986, "learning_rate": 1.0411418555499048e-06, "loss": 0.5419, "step": 10492 }, { "epoch": 1.52, "grad_norm": 6.5499467849731445, "learning_rate": 1.0409837064703236e-06, "loss": 0.4505, "step": 10493 }, { "epoch": 1.52, "grad_norm": 6.055861949920654, "learning_rate": 1.0408255563639622e-06, "loss": 0.5606, "step": 10494 }, { "epoch": 1.52, "grad_norm": 6.078381061553955, "learning_rate": 1.040667405234783e-06, "loss": 0.5105, "step": 10495 }, { "epoch": 1.52, "grad_norm": 7.057531356811523, "learning_rate": 1.040509253086748e-06, "loss": 0.5609, "step": 10496 }, { "epoch": 1.52, "grad_norm": 6.853118896484375, "learning_rate": 1.0403510999238196e-06, "loss": 0.5583, "step": 10497 }, { "epoch": 1.52, "grad_norm": 7.000903129577637, "learning_rate": 1.04019294574996e-06, "loss": 0.5774, "step": 10498 }, { "epoch": 1.52, "grad_norm": 6.565121173858643, "learning_rate": 1.0400347905691314e-06, "loss": 0.5104, "step": 10499 }, { "epoch": 1.52, "grad_norm": 6.897328853607178, "learning_rate": 1.039876634385296e-06, "loss": 0.5926, "step": 10500 }, { "epoch": 1.52, "grad_norm": 6.52175760269165, "learning_rate": 1.0397184772024163e-06, "loss": 0.5317, "step": 10501 }, { "epoch": 1.52, "grad_norm": 7.713136672973633, "learning_rate": 1.0395603190244548e-06, "loss": 0.4965, "step": 10502 }, { "epoch": 1.52, "grad_norm": 6.990836143493652, "learning_rate": 1.039402159855374e-06, "loss": 0.5968, "step": 10503 }, { "epoch": 1.52, "grad_norm": 6.33839225769043, "learning_rate": 1.0392439996991355e-06, "loss": 0.5523, "step": 10504 }, { "epoch": 1.52, "grad_norm": 6.269486427307129, "learning_rate": 1.039085838559703e-06, "loss": 0.4644, "step": 10505 }, { "epoch": 1.52, "grad_norm": 6.919437885284424, "learning_rate": 1.038927676441038e-06, "loss": 0.5398, "step": 10506 }, { "epoch": 1.52, "grad_norm": 6.607359886169434, "learning_rate": 1.0387695133471033e-06, "loss": 0.5517, "step": 10507 }, { "epoch": 1.52, "grad_norm": 6.072635650634766, "learning_rate": 1.0386113492818615e-06, "loss": 0.5597, "step": 10508 }, { "epoch": 1.52, "grad_norm": 6.329777717590332, "learning_rate": 1.0384531842492751e-06, "loss": 0.5001, "step": 10509 }, { "epoch": 1.52, "grad_norm": 6.6203694343566895, "learning_rate": 1.0382950182533065e-06, "loss": 0.5702, "step": 10510 }, { "epoch": 1.53, "grad_norm": 6.104700088500977, "learning_rate": 1.0381368512979187e-06, "loss": 0.5648, "step": 10511 }, { "epoch": 1.53, "grad_norm": 6.262964248657227, "learning_rate": 1.037978683387074e-06, "loss": 0.5562, "step": 10512 }, { "epoch": 1.53, "grad_norm": 6.26474142074585, "learning_rate": 1.0378205145247351e-06, "loss": 0.5884, "step": 10513 }, { "epoch": 1.53, "grad_norm": 6.969522953033447, "learning_rate": 1.0376623447148647e-06, "loss": 0.5284, "step": 10514 }, { "epoch": 1.53, "grad_norm": 5.860495090484619, "learning_rate": 1.0375041739614252e-06, "loss": 0.5477, "step": 10515 }, { "epoch": 1.53, "grad_norm": 6.499289512634277, "learning_rate": 1.0373460022683799e-06, "loss": 0.5181, "step": 10516 }, { "epoch": 1.53, "grad_norm": 6.163395404815674, "learning_rate": 1.037187829639691e-06, "loss": 0.5571, "step": 10517 }, { "epoch": 1.53, "grad_norm": 6.773860931396484, "learning_rate": 1.0370296560793212e-06, "loss": 0.5441, "step": 10518 }, { "epoch": 1.53, "grad_norm": 6.9192423820495605, "learning_rate": 1.0368714815912338e-06, "loss": 0.5413, "step": 10519 }, { "epoch": 1.53, "grad_norm": 5.6856560707092285, "learning_rate": 1.0367133061793916e-06, "loss": 0.5064, "step": 10520 }, { "epoch": 1.53, "grad_norm": 6.2401957511901855, "learning_rate": 1.0365551298477569e-06, "loss": 0.5104, "step": 10521 }, { "epoch": 1.53, "grad_norm": 6.676052093505859, "learning_rate": 1.0363969526002928e-06, "loss": 0.5088, "step": 10522 }, { "epoch": 1.53, "grad_norm": 6.373943328857422, "learning_rate": 1.0362387744409621e-06, "loss": 0.5514, "step": 10523 }, { "epoch": 1.53, "grad_norm": 5.828042507171631, "learning_rate": 1.036080595373728e-06, "loss": 0.4851, "step": 10524 }, { "epoch": 1.53, "grad_norm": 6.314889430999756, "learning_rate": 1.035922415402553e-06, "loss": 0.6154, "step": 10525 }, { "epoch": 1.53, "grad_norm": 6.825951099395752, "learning_rate": 1.0357642345314005e-06, "loss": 0.4846, "step": 10526 }, { "epoch": 1.53, "grad_norm": 6.452639579772949, "learning_rate": 1.0356060527642332e-06, "loss": 0.53, "step": 10527 }, { "epoch": 1.53, "grad_norm": 5.982109546661377, "learning_rate": 1.0354478701050137e-06, "loss": 0.5838, "step": 10528 }, { "epoch": 1.53, "grad_norm": 6.875734806060791, "learning_rate": 1.035289686557706e-06, "loss": 0.482, "step": 10529 }, { "epoch": 1.53, "grad_norm": 6.510693073272705, "learning_rate": 1.035131502126272e-06, "loss": 0.5151, "step": 10530 }, { "epoch": 1.53, "grad_norm": 6.664001941680908, "learning_rate": 1.0349733168146753e-06, "loss": 0.5208, "step": 10531 }, { "epoch": 1.53, "grad_norm": 6.803639888763428, "learning_rate": 1.034815130626879e-06, "loss": 0.5482, "step": 10532 }, { "epoch": 1.53, "grad_norm": 6.085157871246338, "learning_rate": 1.0346569435668467e-06, "loss": 0.5506, "step": 10533 }, { "epoch": 1.53, "grad_norm": 7.0265092849731445, "learning_rate": 1.0344987556385407e-06, "loss": 0.4713, "step": 10534 }, { "epoch": 1.53, "grad_norm": 6.352632522583008, "learning_rate": 1.0343405668459244e-06, "loss": 0.4577, "step": 10535 }, { "epoch": 1.53, "grad_norm": 6.693734169006348, "learning_rate": 1.034182377192961e-06, "loss": 0.4948, "step": 10536 }, { "epoch": 1.53, "grad_norm": 5.91874361038208, "learning_rate": 1.0340241866836134e-06, "loss": 0.5273, "step": 10537 }, { "epoch": 1.53, "grad_norm": 6.2408342361450195, "learning_rate": 1.0338659953218454e-06, "loss": 0.4948, "step": 10538 }, { "epoch": 1.53, "grad_norm": 6.713253498077393, "learning_rate": 1.03370780311162e-06, "loss": 0.6019, "step": 10539 }, { "epoch": 1.53, "grad_norm": 6.2867536544799805, "learning_rate": 1.0335496100569004e-06, "loss": 0.5178, "step": 10540 }, { "epoch": 1.53, "grad_norm": 5.8425984382629395, "learning_rate": 1.0333914161616496e-06, "loss": 0.5192, "step": 10541 }, { "epoch": 1.53, "grad_norm": 7.475887298583984, "learning_rate": 1.0332332214298311e-06, "loss": 0.6581, "step": 10542 }, { "epoch": 1.53, "grad_norm": 5.867544174194336, "learning_rate": 1.033075025865408e-06, "loss": 0.5188, "step": 10543 }, { "epoch": 1.53, "grad_norm": 8.460759162902832, "learning_rate": 1.0329168294723446e-06, "loss": 0.619, "step": 10544 }, { "epoch": 1.53, "grad_norm": 5.928850173950195, "learning_rate": 1.0327586322546028e-06, "loss": 0.4247, "step": 10545 }, { "epoch": 1.53, "grad_norm": 6.173043251037598, "learning_rate": 1.032600434216147e-06, "loss": 0.5067, "step": 10546 }, { "epoch": 1.53, "grad_norm": 6.121676445007324, "learning_rate": 1.0324422353609403e-06, "loss": 0.5518, "step": 10547 }, { "epoch": 1.53, "grad_norm": 6.276890277862549, "learning_rate": 1.0322840356929463e-06, "loss": 0.4884, "step": 10548 }, { "epoch": 1.53, "grad_norm": 6.171991348266602, "learning_rate": 1.0321258352161281e-06, "loss": 0.5536, "step": 10549 }, { "epoch": 1.53, "grad_norm": 6.212228298187256, "learning_rate": 1.0319676339344491e-06, "loss": 0.5263, "step": 10550 }, { "epoch": 1.53, "grad_norm": 5.891904354095459, "learning_rate": 1.0318094318518733e-06, "loss": 0.4816, "step": 10551 }, { "epoch": 1.53, "grad_norm": 6.5900421142578125, "learning_rate": 1.0316512289723637e-06, "loss": 0.5117, "step": 10552 }, { "epoch": 1.53, "grad_norm": 6.287887096405029, "learning_rate": 1.0314930252998838e-06, "loss": 0.512, "step": 10553 }, { "epoch": 1.53, "grad_norm": 6.641598224639893, "learning_rate": 1.0313348208383973e-06, "loss": 0.551, "step": 10554 }, { "epoch": 1.53, "grad_norm": 6.298679828643799, "learning_rate": 1.0311766155918685e-06, "loss": 0.5347, "step": 10555 }, { "epoch": 1.53, "grad_norm": 6.495449066162109, "learning_rate": 1.0310184095642596e-06, "loss": 0.5093, "step": 10556 }, { "epoch": 1.53, "grad_norm": 7.051272869110107, "learning_rate": 1.0308602027595347e-06, "loss": 0.532, "step": 10557 }, { "epoch": 1.53, "grad_norm": 7.522655963897705, "learning_rate": 1.0307019951816577e-06, "loss": 0.5291, "step": 10558 }, { "epoch": 1.53, "grad_norm": 7.31890869140625, "learning_rate": 1.0305437868345923e-06, "loss": 0.4926, "step": 10559 }, { "epoch": 1.53, "grad_norm": 6.711087703704834, "learning_rate": 1.0303855777223018e-06, "loss": 0.5108, "step": 10560 }, { "epoch": 1.53, "grad_norm": 6.215082168579102, "learning_rate": 1.0302273678487503e-06, "loss": 0.5507, "step": 10561 }, { "epoch": 1.53, "grad_norm": 6.460882663726807, "learning_rate": 1.030069157217901e-06, "loss": 0.5121, "step": 10562 }, { "epoch": 1.53, "grad_norm": 6.181368350982666, "learning_rate": 1.0299109458337179e-06, "loss": 0.4931, "step": 10563 }, { "epoch": 1.53, "grad_norm": 7.416673183441162, "learning_rate": 1.0297527337001645e-06, "loss": 0.6174, "step": 10564 }, { "epoch": 1.53, "grad_norm": 6.73676061630249, "learning_rate": 1.0295945208212047e-06, "loss": 0.5537, "step": 10565 }, { "epoch": 1.53, "grad_norm": 7.251918315887451, "learning_rate": 1.0294363072008022e-06, "loss": 0.587, "step": 10566 }, { "epoch": 1.53, "grad_norm": 6.639589309692383, "learning_rate": 1.0292780928429212e-06, "loss": 0.5007, "step": 10567 }, { "epoch": 1.53, "grad_norm": 5.556299686431885, "learning_rate": 1.0291198777515251e-06, "loss": 0.5289, "step": 10568 }, { "epoch": 1.53, "grad_norm": 7.092350482940674, "learning_rate": 1.0289616619305777e-06, "loss": 0.5312, "step": 10569 }, { "epoch": 1.53, "grad_norm": 6.433780670166016, "learning_rate": 1.028803445384043e-06, "loss": 0.5284, "step": 10570 }, { "epoch": 1.53, "grad_norm": 7.7684478759765625, "learning_rate": 1.0286452281158844e-06, "loss": 0.6278, "step": 10571 }, { "epoch": 1.53, "grad_norm": 6.702802658081055, "learning_rate": 1.0284870101300662e-06, "loss": 0.5876, "step": 10572 }, { "epoch": 1.53, "grad_norm": 6.336124897003174, "learning_rate": 1.0283287914305525e-06, "loss": 0.5654, "step": 10573 }, { "epoch": 1.53, "grad_norm": 6.136504173278809, "learning_rate": 1.028170572021307e-06, "loss": 0.5502, "step": 10574 }, { "epoch": 1.53, "grad_norm": 6.402093410491943, "learning_rate": 1.0280123519062932e-06, "loss": 0.4412, "step": 10575 }, { "epoch": 1.53, "grad_norm": 7.081981658935547, "learning_rate": 1.0278541310894762e-06, "loss": 0.6111, "step": 10576 }, { "epoch": 1.53, "grad_norm": 6.141475200653076, "learning_rate": 1.0276959095748186e-06, "loss": 0.5361, "step": 10577 }, { "epoch": 1.53, "grad_norm": 6.443163871765137, "learning_rate": 1.0275376873662851e-06, "loss": 0.5591, "step": 10578 }, { "epoch": 1.53, "grad_norm": 5.954693794250488, "learning_rate": 1.0273794644678395e-06, "loss": 0.5121, "step": 10579 }, { "epoch": 1.54, "grad_norm": 6.362637519836426, "learning_rate": 1.027221240883446e-06, "loss": 0.5481, "step": 10580 }, { "epoch": 1.54, "grad_norm": 6.666403770446777, "learning_rate": 1.0270630166170686e-06, "loss": 0.4336, "step": 10581 }, { "epoch": 1.54, "grad_norm": 6.598636627197266, "learning_rate": 1.0269047916726712e-06, "loss": 0.5134, "step": 10582 }, { "epoch": 1.54, "grad_norm": 6.5374226570129395, "learning_rate": 1.026746566054218e-06, "loss": 0.482, "step": 10583 }, { "epoch": 1.54, "grad_norm": 6.728602886199951, "learning_rate": 1.0265883397656729e-06, "loss": 0.4733, "step": 10584 }, { "epoch": 1.54, "grad_norm": 6.330026626586914, "learning_rate": 1.0264301128110002e-06, "loss": 0.455, "step": 10585 }, { "epoch": 1.54, "grad_norm": 6.671828269958496, "learning_rate": 1.026271885194164e-06, "loss": 0.498, "step": 10586 }, { "epoch": 1.54, "grad_norm": 6.097837448120117, "learning_rate": 1.0261136569191286e-06, "loss": 0.4933, "step": 10587 }, { "epoch": 1.54, "grad_norm": 6.446673393249512, "learning_rate": 1.025955427989858e-06, "loss": 0.5211, "step": 10588 }, { "epoch": 1.54, "grad_norm": 6.501456260681152, "learning_rate": 1.0257971984103163e-06, "loss": 0.5423, "step": 10589 }, { "epoch": 1.54, "grad_norm": 6.0073113441467285, "learning_rate": 1.0256389681844673e-06, "loss": 0.5036, "step": 10590 }, { "epoch": 1.54, "grad_norm": 6.884528160095215, "learning_rate": 1.025480737316276e-06, "loss": 0.5773, "step": 10591 }, { "epoch": 1.54, "grad_norm": 6.912513732910156, "learning_rate": 1.0253225058097059e-06, "loss": 0.5021, "step": 10592 }, { "epoch": 1.54, "grad_norm": 6.007830619812012, "learning_rate": 1.0251642736687218e-06, "loss": 0.5367, "step": 10593 }, { "epoch": 1.54, "grad_norm": 7.253849506378174, "learning_rate": 1.0250060408972876e-06, "loss": 0.628, "step": 10594 }, { "epoch": 1.54, "grad_norm": 6.214087963104248, "learning_rate": 1.0248478074993679e-06, "loss": 0.5706, "step": 10595 }, { "epoch": 1.54, "grad_norm": 7.486738681793213, "learning_rate": 1.0246895734789267e-06, "loss": 0.6314, "step": 10596 }, { "epoch": 1.54, "grad_norm": 5.887916088104248, "learning_rate": 1.024531338839928e-06, "loss": 0.5769, "step": 10597 }, { "epoch": 1.54, "grad_norm": 6.444683074951172, "learning_rate": 1.024373103586337e-06, "loss": 0.56, "step": 10598 }, { "epoch": 1.54, "grad_norm": 6.008410453796387, "learning_rate": 1.024214867722117e-06, "loss": 0.5176, "step": 10599 }, { "epoch": 1.54, "grad_norm": 6.801053524017334, "learning_rate": 1.0240566312512331e-06, "loss": 0.538, "step": 10600 }, { "epoch": 1.54, "grad_norm": 6.0265679359436035, "learning_rate": 1.0238983941776494e-06, "loss": 0.5289, "step": 10601 }, { "epoch": 1.54, "grad_norm": 6.215209484100342, "learning_rate": 1.0237401565053301e-06, "loss": 0.5804, "step": 10602 }, { "epoch": 1.54, "grad_norm": 6.202282905578613, "learning_rate": 1.0235819182382397e-06, "loss": 0.5636, "step": 10603 }, { "epoch": 1.54, "grad_norm": 5.828154563903809, "learning_rate": 1.023423679380343e-06, "loss": 0.4704, "step": 10604 }, { "epoch": 1.54, "grad_norm": 6.785002708435059, "learning_rate": 1.023265439935604e-06, "loss": 0.5187, "step": 10605 }, { "epoch": 1.54, "grad_norm": 6.64840841293335, "learning_rate": 1.023107199907987e-06, "loss": 0.5762, "step": 10606 }, { "epoch": 1.54, "grad_norm": 6.071714878082275, "learning_rate": 1.0229489593014567e-06, "loss": 0.5611, "step": 10607 }, { "epoch": 1.54, "grad_norm": 6.575584411621094, "learning_rate": 1.0227907181199775e-06, "loss": 0.6264, "step": 10608 }, { "epoch": 1.54, "grad_norm": 5.862648963928223, "learning_rate": 1.022632476367514e-06, "loss": 0.4851, "step": 10609 }, { "epoch": 1.54, "grad_norm": 6.4683098793029785, "learning_rate": 1.0224742340480307e-06, "loss": 0.5467, "step": 10610 }, { "epoch": 1.54, "grad_norm": 6.029024600982666, "learning_rate": 1.0223159911654914e-06, "loss": 0.5438, "step": 10611 }, { "epoch": 1.54, "grad_norm": 5.82688045501709, "learning_rate": 1.0221577477238617e-06, "loss": 0.5939, "step": 10612 }, { "epoch": 1.54, "grad_norm": 6.0246806144714355, "learning_rate": 1.0219995037271055e-06, "loss": 0.4416, "step": 10613 }, { "epoch": 1.54, "grad_norm": 6.2415008544921875, "learning_rate": 1.0218412591791874e-06, "loss": 0.5146, "step": 10614 }, { "epoch": 1.54, "grad_norm": 6.632760047912598, "learning_rate": 1.0216830140840722e-06, "loss": 0.5518, "step": 10615 }, { "epoch": 1.54, "grad_norm": 6.951657772064209, "learning_rate": 1.0215247684457241e-06, "loss": 0.5539, "step": 10616 }, { "epoch": 1.54, "grad_norm": 7.6719279289245605, "learning_rate": 1.0213665222681086e-06, "loss": 0.5015, "step": 10617 }, { "epoch": 1.54, "grad_norm": 6.201049327850342, "learning_rate": 1.021208275555189e-06, "loss": 0.444, "step": 10618 }, { "epoch": 1.54, "grad_norm": 5.942958354949951, "learning_rate": 1.0210500283109304e-06, "loss": 0.5679, "step": 10619 }, { "epoch": 1.54, "grad_norm": 6.872108459472656, "learning_rate": 1.020891780539298e-06, "loss": 0.5297, "step": 10620 }, { "epoch": 1.54, "grad_norm": 7.190117359161377, "learning_rate": 1.0207335322442554e-06, "loss": 0.6158, "step": 10621 }, { "epoch": 1.54, "grad_norm": 6.514769554138184, "learning_rate": 1.020575283429768e-06, "loss": 0.5133, "step": 10622 }, { "epoch": 1.54, "grad_norm": 6.528891563415527, "learning_rate": 1.0204170340998006e-06, "loss": 0.454, "step": 10623 }, { "epoch": 1.54, "grad_norm": 6.12673807144165, "learning_rate": 1.0202587842583177e-06, "loss": 0.548, "step": 10624 }, { "epoch": 1.54, "grad_norm": 5.71082067489624, "learning_rate": 1.0201005339092834e-06, "loss": 0.4995, "step": 10625 }, { "epoch": 1.54, "grad_norm": 6.724814414978027, "learning_rate": 1.019942283056663e-06, "loss": 0.4783, "step": 10626 }, { "epoch": 1.54, "grad_norm": 6.770772457122803, "learning_rate": 1.0197840317044213e-06, "loss": 0.5962, "step": 10627 }, { "epoch": 1.54, "grad_norm": 6.5343756675720215, "learning_rate": 1.0196257798565229e-06, "loss": 0.5209, "step": 10628 }, { "epoch": 1.54, "grad_norm": 6.449734210968018, "learning_rate": 1.0194675275169321e-06, "loss": 0.5423, "step": 10629 }, { "epoch": 1.54, "grad_norm": 6.530101299285889, "learning_rate": 1.0193092746896143e-06, "loss": 0.5266, "step": 10630 }, { "epoch": 1.54, "grad_norm": 6.550050258636475, "learning_rate": 1.0191510213785342e-06, "loss": 0.6023, "step": 10631 }, { "epoch": 1.54, "grad_norm": 6.340219974517822, "learning_rate": 1.018992767587656e-06, "loss": 0.4949, "step": 10632 }, { "epoch": 1.54, "grad_norm": 7.246034622192383, "learning_rate": 1.0188345133209448e-06, "loss": 0.5031, "step": 10633 }, { "epoch": 1.54, "grad_norm": 6.59566068649292, "learning_rate": 1.0186762585823658e-06, "loss": 0.4801, "step": 10634 }, { "epoch": 1.54, "grad_norm": 6.190431594848633, "learning_rate": 1.0185180033758832e-06, "loss": 0.5096, "step": 10635 }, { "epoch": 1.54, "grad_norm": 6.009250640869141, "learning_rate": 1.018359747705462e-06, "loss": 0.5715, "step": 10636 }, { "epoch": 1.54, "grad_norm": 6.122505187988281, "learning_rate": 1.0182014915750677e-06, "loss": 0.4834, "step": 10637 }, { "epoch": 1.54, "grad_norm": 6.5252861976623535, "learning_rate": 1.0180432349886642e-06, "loss": 0.5404, "step": 10638 }, { "epoch": 1.54, "grad_norm": 6.823750019073486, "learning_rate": 1.0178849779502169e-06, "loss": 0.5416, "step": 10639 }, { "epoch": 1.54, "grad_norm": 6.527866840362549, "learning_rate": 1.0177267204636906e-06, "loss": 0.5408, "step": 10640 }, { "epoch": 1.54, "grad_norm": 6.730687141418457, "learning_rate": 1.0175684625330499e-06, "loss": 0.5774, "step": 10641 }, { "epoch": 1.54, "grad_norm": 6.446315765380859, "learning_rate": 1.0174102041622601e-06, "loss": 0.6345, "step": 10642 }, { "epoch": 1.54, "grad_norm": 6.625303268432617, "learning_rate": 1.0172519453552857e-06, "loss": 0.6141, "step": 10643 }, { "epoch": 1.54, "grad_norm": 14.974166870117188, "learning_rate": 1.017093686116092e-06, "loss": 0.5797, "step": 10644 }, { "epoch": 1.54, "grad_norm": 6.581972599029541, "learning_rate": 1.016935426448644e-06, "loss": 0.5114, "step": 10645 }, { "epoch": 1.54, "grad_norm": 6.612091064453125, "learning_rate": 1.0167771663569064e-06, "loss": 0.538, "step": 10646 }, { "epoch": 1.54, "grad_norm": 5.935414791107178, "learning_rate": 1.0166189058448438e-06, "loss": 0.5131, "step": 10647 }, { "epoch": 1.54, "grad_norm": 7.069015979766846, "learning_rate": 1.0164606449164216e-06, "loss": 0.6238, "step": 10648 }, { "epoch": 1.55, "grad_norm": 6.112884044647217, "learning_rate": 1.0163023835756046e-06, "loss": 0.4559, "step": 10649 }, { "epoch": 1.55, "grad_norm": 5.986718654632568, "learning_rate": 1.016144121826358e-06, "loss": 0.4876, "step": 10650 }, { "epoch": 1.55, "grad_norm": 7.130685806274414, "learning_rate": 1.0159858596726468e-06, "loss": 0.5398, "step": 10651 }, { "epoch": 1.55, "grad_norm": 5.9081525802612305, "learning_rate": 1.0158275971184359e-06, "loss": 0.5331, "step": 10652 }, { "epoch": 1.55, "grad_norm": 7.1796770095825195, "learning_rate": 1.01566933416769e-06, "loss": 0.5693, "step": 10653 }, { "epoch": 1.55, "grad_norm": 6.950843334197998, "learning_rate": 1.0155110708243744e-06, "loss": 0.6385, "step": 10654 }, { "epoch": 1.55, "grad_norm": 6.7496185302734375, "learning_rate": 1.0153528070924541e-06, "loss": 0.5381, "step": 10655 }, { "epoch": 1.55, "grad_norm": 6.75897216796875, "learning_rate": 1.0151945429758943e-06, "loss": 0.5265, "step": 10656 }, { "epoch": 1.55, "grad_norm": 6.375042915344238, "learning_rate": 1.01503627847866e-06, "loss": 0.5086, "step": 10657 }, { "epoch": 1.55, "grad_norm": 6.879556655883789, "learning_rate": 1.0148780136047159e-06, "loss": 0.5867, "step": 10658 }, { "epoch": 1.55, "grad_norm": 6.055920600891113, "learning_rate": 1.014719748358028e-06, "loss": 0.4693, "step": 10659 }, { "epoch": 1.55, "grad_norm": 7.48831033706665, "learning_rate": 1.0145614827425601e-06, "loss": 0.5472, "step": 10660 }, { "epoch": 1.55, "grad_norm": 6.7499895095825195, "learning_rate": 1.0144032167622778e-06, "loss": 0.5875, "step": 10661 }, { "epoch": 1.55, "grad_norm": 6.763931751251221, "learning_rate": 1.0142449504211467e-06, "loss": 0.4677, "step": 10662 }, { "epoch": 1.55, "grad_norm": 6.739100456237793, "learning_rate": 1.0140866837231311e-06, "loss": 0.5259, "step": 10663 }, { "epoch": 1.55, "grad_norm": 6.874138355255127, "learning_rate": 1.0139284166721968e-06, "loss": 0.5343, "step": 10664 }, { "epoch": 1.55, "grad_norm": 7.094969749450684, "learning_rate": 1.0137701492723088e-06, "loss": 0.5206, "step": 10665 }, { "epoch": 1.55, "grad_norm": 5.954894542694092, "learning_rate": 1.0136118815274319e-06, "loss": 0.5503, "step": 10666 }, { "epoch": 1.55, "grad_norm": 6.979402542114258, "learning_rate": 1.0134536134415315e-06, "loss": 0.6208, "step": 10667 }, { "epoch": 1.55, "grad_norm": 7.509828567504883, "learning_rate": 1.0132953450185725e-06, "loss": 0.643, "step": 10668 }, { "epoch": 1.55, "grad_norm": 6.622716426849365, "learning_rate": 1.0131370762625204e-06, "loss": 0.5397, "step": 10669 }, { "epoch": 1.55, "grad_norm": 6.36198616027832, "learning_rate": 1.0129788071773403e-06, "loss": 0.5447, "step": 10670 }, { "epoch": 1.55, "grad_norm": 6.300507068634033, "learning_rate": 1.012820537766997e-06, "loss": 0.5672, "step": 10671 }, { "epoch": 1.55, "grad_norm": 6.463855743408203, "learning_rate": 1.0126622680354561e-06, "loss": 0.5261, "step": 10672 }, { "epoch": 1.55, "grad_norm": 6.427636623382568, "learning_rate": 1.0125039979866827e-06, "loss": 0.5654, "step": 10673 }, { "epoch": 1.55, "grad_norm": 6.349562644958496, "learning_rate": 1.012345727624642e-06, "loss": 0.4934, "step": 10674 }, { "epoch": 1.55, "grad_norm": 6.163871765136719, "learning_rate": 1.012187456953299e-06, "loss": 0.4942, "step": 10675 }, { "epoch": 1.55, "grad_norm": 6.365171909332275, "learning_rate": 1.012029185976619e-06, "loss": 0.5013, "step": 10676 }, { "epoch": 1.55, "grad_norm": 6.517433166503906, "learning_rate": 1.0118709146985675e-06, "loss": 0.6179, "step": 10677 }, { "epoch": 1.55, "grad_norm": 7.23824405670166, "learning_rate": 1.0117126431231093e-06, "loss": 0.5192, "step": 10678 }, { "epoch": 1.55, "grad_norm": 6.239743709564209, "learning_rate": 1.01155437125421e-06, "loss": 0.4965, "step": 10679 }, { "epoch": 1.55, "grad_norm": 6.090859889984131, "learning_rate": 1.0113960990958351e-06, "loss": 0.5119, "step": 10680 }, { "epoch": 1.55, "grad_norm": 6.527507781982422, "learning_rate": 1.0112378266519487e-06, "loss": 0.566, "step": 10681 }, { "epoch": 1.55, "grad_norm": 6.754213333129883, "learning_rate": 1.0110795539265172e-06, "loss": 0.5074, "step": 10682 }, { "epoch": 1.55, "grad_norm": 6.325412750244141, "learning_rate": 1.0109212809235055e-06, "loss": 0.5729, "step": 10683 }, { "epoch": 1.55, "grad_norm": 6.102933883666992, "learning_rate": 1.0107630076468787e-06, "loss": 0.5472, "step": 10684 }, { "epoch": 1.55, "grad_norm": 8.501237869262695, "learning_rate": 1.0106047341006022e-06, "loss": 0.5724, "step": 10685 }, { "epoch": 1.55, "grad_norm": 6.309503555297852, "learning_rate": 1.0104464602886416e-06, "loss": 0.5211, "step": 10686 }, { "epoch": 1.55, "grad_norm": 5.930588722229004, "learning_rate": 1.0102881862149617e-06, "loss": 0.4968, "step": 10687 }, { "epoch": 1.55, "grad_norm": 6.29229211807251, "learning_rate": 1.010129911883528e-06, "loss": 0.5073, "step": 10688 }, { "epoch": 1.55, "grad_norm": 6.6798295974731445, "learning_rate": 1.0099716372983057e-06, "loss": 0.5829, "step": 10689 }, { "epoch": 1.55, "grad_norm": 6.093920707702637, "learning_rate": 1.0098133624632606e-06, "loss": 0.5284, "step": 10690 }, { "epoch": 1.55, "grad_norm": 6.24198579788208, "learning_rate": 1.0096550873823573e-06, "loss": 0.5637, "step": 10691 }, { "epoch": 1.55, "grad_norm": 6.97321081161499, "learning_rate": 1.0094968120595618e-06, "loss": 0.6011, "step": 10692 }, { "epoch": 1.55, "grad_norm": 6.934744834899902, "learning_rate": 1.009338536498839e-06, "loss": 0.5552, "step": 10693 }, { "epoch": 1.55, "grad_norm": 6.222344875335693, "learning_rate": 1.0091802607041541e-06, "loss": 0.4843, "step": 10694 }, { "epoch": 1.55, "grad_norm": 6.451713562011719, "learning_rate": 1.009021984679473e-06, "loss": 0.5154, "step": 10695 }, { "epoch": 1.55, "grad_norm": 7.631655693054199, "learning_rate": 1.0088637084287605e-06, "loss": 0.5525, "step": 10696 }, { "epoch": 1.55, "grad_norm": 7.350127220153809, "learning_rate": 1.0087054319559822e-06, "loss": 0.5518, "step": 10697 }, { "epoch": 1.55, "grad_norm": 5.961513519287109, "learning_rate": 1.0085471552651035e-06, "loss": 0.4829, "step": 10698 }, { "epoch": 1.55, "grad_norm": 6.755773544311523, "learning_rate": 1.0083888783600899e-06, "loss": 0.4877, "step": 10699 }, { "epoch": 1.55, "grad_norm": 6.314786911010742, "learning_rate": 1.0082306012449065e-06, "loss": 0.5388, "step": 10700 }, { "epoch": 1.55, "grad_norm": 5.840033531188965, "learning_rate": 1.0080723239235188e-06, "loss": 0.5042, "step": 10701 }, { "epoch": 1.55, "grad_norm": 6.490549564361572, "learning_rate": 1.007914046399892e-06, "loss": 0.5444, "step": 10702 }, { "epoch": 1.55, "grad_norm": 6.996322154998779, "learning_rate": 1.0077557686779914e-06, "loss": 0.5336, "step": 10703 }, { "epoch": 1.55, "grad_norm": 7.283633232116699, "learning_rate": 1.0075974907617828e-06, "loss": 0.6506, "step": 10704 }, { "epoch": 1.55, "grad_norm": 6.603724956512451, "learning_rate": 1.0074392126552315e-06, "loss": 0.49, "step": 10705 }, { "epoch": 1.55, "grad_norm": 7.115146160125732, "learning_rate": 1.0072809343623028e-06, "loss": 0.4965, "step": 10706 }, { "epoch": 1.55, "grad_norm": 7.41063117980957, "learning_rate": 1.007122655886962e-06, "loss": 0.5985, "step": 10707 }, { "epoch": 1.55, "grad_norm": 6.166238307952881, "learning_rate": 1.0069643772331752e-06, "loss": 0.5165, "step": 10708 }, { "epoch": 1.55, "grad_norm": 5.989428520202637, "learning_rate": 1.0068060984049065e-06, "loss": 0.5115, "step": 10709 }, { "epoch": 1.55, "grad_norm": 6.978009223937988, "learning_rate": 1.0066478194061224e-06, "loss": 0.6145, "step": 10710 }, { "epoch": 1.55, "grad_norm": 6.813746452331543, "learning_rate": 1.0064895402407877e-06, "loss": 0.5395, "step": 10711 }, { "epoch": 1.55, "grad_norm": 5.84977388381958, "learning_rate": 1.0063312609128685e-06, "loss": 0.4971, "step": 10712 }, { "epoch": 1.55, "grad_norm": 6.017190456390381, "learning_rate": 1.0061729814263294e-06, "loss": 0.489, "step": 10713 }, { "epoch": 1.55, "grad_norm": 7.9669718742370605, "learning_rate": 1.006014701785137e-06, "loss": 0.5823, "step": 10714 }, { "epoch": 1.55, "grad_norm": 5.802934646606445, "learning_rate": 1.0058564219932554e-06, "loss": 0.492, "step": 10715 }, { "epoch": 1.55, "grad_norm": 6.3123273849487305, "learning_rate": 1.0056981420546508e-06, "loss": 0.5505, "step": 10716 }, { "epoch": 1.55, "grad_norm": 6.425966739654541, "learning_rate": 1.0055398619732885e-06, "loss": 0.5219, "step": 10717 }, { "epoch": 1.56, "grad_norm": 6.179140567779541, "learning_rate": 1.0053815817531342e-06, "loss": 0.5089, "step": 10718 }, { "epoch": 1.56, "grad_norm": 7.500188827514648, "learning_rate": 1.0052233013981528e-06, "loss": 0.6327, "step": 10719 }, { "epoch": 1.56, "grad_norm": 6.64601469039917, "learning_rate": 1.0050650209123103e-06, "loss": 0.4966, "step": 10720 }, { "epoch": 1.56, "grad_norm": 6.443263530731201, "learning_rate": 1.0049067402995718e-06, "loss": 0.5258, "step": 10721 }, { "epoch": 1.56, "grad_norm": 6.814658164978027, "learning_rate": 1.0047484595639029e-06, "loss": 0.5416, "step": 10722 }, { "epoch": 1.56, "grad_norm": 5.944779396057129, "learning_rate": 1.0045901787092692e-06, "loss": 0.5121, "step": 10723 }, { "epoch": 1.56, "grad_norm": 6.980202674865723, "learning_rate": 1.0044318977396358e-06, "loss": 0.5403, "step": 10724 }, { "epoch": 1.56, "grad_norm": 6.65249490737915, "learning_rate": 1.0042736166589684e-06, "loss": 0.5487, "step": 10725 }, { "epoch": 1.56, "grad_norm": 6.409507751464844, "learning_rate": 1.0041153354712327e-06, "loss": 0.4771, "step": 10726 }, { "epoch": 1.56, "grad_norm": 6.888347148895264, "learning_rate": 1.003957054180394e-06, "loss": 0.5506, "step": 10727 }, { "epoch": 1.56, "grad_norm": 6.832520484924316, "learning_rate": 1.0037987727904175e-06, "loss": 0.5074, "step": 10728 }, { "epoch": 1.56, "grad_norm": 6.200701713562012, "learning_rate": 1.0036404913052692e-06, "loss": 0.5362, "step": 10729 }, { "epoch": 1.56, "grad_norm": 7.006568431854248, "learning_rate": 1.003482209728914e-06, "loss": 0.5555, "step": 10730 }, { "epoch": 1.56, "grad_norm": 6.954680442810059, "learning_rate": 1.0033239280653179e-06, "loss": 0.5135, "step": 10731 }, { "epoch": 1.56, "grad_norm": 6.074249744415283, "learning_rate": 1.003165646318446e-06, "loss": 0.5702, "step": 10732 }, { "epoch": 1.56, "grad_norm": 6.887825012207031, "learning_rate": 1.003007364492264e-06, "loss": 0.507, "step": 10733 }, { "epoch": 1.56, "grad_norm": 7.630953311920166, "learning_rate": 1.0028490825907374e-06, "loss": 0.5639, "step": 10734 }, { "epoch": 1.56, "grad_norm": 5.827160835266113, "learning_rate": 1.0026908006178318e-06, "loss": 0.5574, "step": 10735 }, { "epoch": 1.56, "grad_norm": 6.271949291229248, "learning_rate": 1.0025325185775127e-06, "loss": 0.5678, "step": 10736 }, { "epoch": 1.56, "grad_norm": 6.808998107910156, "learning_rate": 1.0023742364737452e-06, "loss": 0.5138, "step": 10737 }, { "epoch": 1.56, "grad_norm": 6.5077643394470215, "learning_rate": 1.0022159543104951e-06, "loss": 0.6407, "step": 10738 }, { "epoch": 1.56, "grad_norm": 6.807989597320557, "learning_rate": 1.0020576720917276e-06, "loss": 0.4928, "step": 10739 }, { "epoch": 1.56, "grad_norm": 6.468861103057861, "learning_rate": 1.0018993898214086e-06, "loss": 0.5845, "step": 10740 }, { "epoch": 1.56, "grad_norm": 5.992048740386963, "learning_rate": 1.001741107503504e-06, "loss": 0.5056, "step": 10741 }, { "epoch": 1.56, "grad_norm": 5.731698036193848, "learning_rate": 1.0015828251419783e-06, "loss": 0.5155, "step": 10742 }, { "epoch": 1.56, "grad_norm": 6.309797286987305, "learning_rate": 1.0014245427407974e-06, "loss": 0.5115, "step": 10743 }, { "epoch": 1.56, "grad_norm": 6.818399906158447, "learning_rate": 1.0012662603039271e-06, "loss": 0.572, "step": 10744 }, { "epoch": 1.56, "grad_norm": 7.285825252532959, "learning_rate": 1.0011079778353325e-06, "loss": 0.5319, "step": 10745 }, { "epoch": 1.56, "grad_norm": 6.119741916656494, "learning_rate": 1.0009496953389796e-06, "loss": 0.4875, "step": 10746 }, { "epoch": 1.56, "grad_norm": 6.151933193206787, "learning_rate": 1.0007914128188335e-06, "loss": 0.4989, "step": 10747 }, { "epoch": 1.56, "grad_norm": 6.301705837249756, "learning_rate": 1.0006331302788597e-06, "loss": 0.4923, "step": 10748 }, { "epoch": 1.56, "grad_norm": 6.187967300415039, "learning_rate": 1.0004748477230243e-06, "loss": 0.5508, "step": 10749 }, { "epoch": 1.56, "grad_norm": 7.727250576019287, "learning_rate": 1.000316565155292e-06, "loss": 0.5396, "step": 10750 }, { "epoch": 1.56, "grad_norm": 6.939038276672363, "learning_rate": 1.000158282579629e-06, "loss": 0.4716, "step": 10751 }, { "epoch": 1.56, "grad_norm": 7.040472984313965, "learning_rate": 1e-06, "loss": 0.559, "step": 10752 }, { "epoch": 1.56, "grad_norm": 7.034924030303955, "learning_rate": 9.99841717420371e-07, "loss": 0.5906, "step": 10753 }, { "epoch": 1.56, "grad_norm": 6.749056816101074, "learning_rate": 9.99683434844708e-07, "loss": 0.5678, "step": 10754 }, { "epoch": 1.56, "grad_norm": 6.451381206512451, "learning_rate": 9.995251522769756e-07, "loss": 0.5241, "step": 10755 }, { "epoch": 1.56, "grad_norm": 6.002399921417236, "learning_rate": 9.993668697211402e-07, "loss": 0.5467, "step": 10756 }, { "epoch": 1.56, "grad_norm": 6.504219055175781, "learning_rate": 9.992085871811664e-07, "loss": 0.4791, "step": 10757 }, { "epoch": 1.56, "grad_norm": 6.4160051345825195, "learning_rate": 9.990503046610203e-07, "loss": 0.5309, "step": 10758 }, { "epoch": 1.56, "grad_norm": 6.993788242340088, "learning_rate": 9.988920221646672e-07, "loss": 0.5508, "step": 10759 }, { "epoch": 1.56, "grad_norm": 5.6003875732421875, "learning_rate": 9.987337396960728e-07, "loss": 0.4923, "step": 10760 }, { "epoch": 1.56, "grad_norm": 7.04973030090332, "learning_rate": 9.985754572592023e-07, "loss": 0.5171, "step": 10761 }, { "epoch": 1.56, "grad_norm": 6.069398880004883, "learning_rate": 9.984171748580218e-07, "loss": 0.5242, "step": 10762 }, { "epoch": 1.56, "grad_norm": 6.067538738250732, "learning_rate": 9.982588924964962e-07, "loss": 0.5325, "step": 10763 }, { "epoch": 1.56, "grad_norm": 6.179460048675537, "learning_rate": 9.981006101785913e-07, "loss": 0.5615, "step": 10764 }, { "epoch": 1.56, "grad_norm": 6.983046054840088, "learning_rate": 9.979423279082723e-07, "loss": 0.5619, "step": 10765 }, { "epoch": 1.56, "grad_norm": 6.507807731628418, "learning_rate": 9.977840456895052e-07, "loss": 0.4914, "step": 10766 }, { "epoch": 1.56, "grad_norm": 7.237122535705566, "learning_rate": 9.976257635262547e-07, "loss": 0.5968, "step": 10767 }, { "epoch": 1.56, "grad_norm": 6.028097629547119, "learning_rate": 9.974674814224876e-07, "loss": 0.5657, "step": 10768 }, { "epoch": 1.56, "grad_norm": 7.479630947113037, "learning_rate": 9.973091993821683e-07, "loss": 0.5759, "step": 10769 }, { "epoch": 1.56, "grad_norm": 5.616400241851807, "learning_rate": 9.971509174092625e-07, "loss": 0.5206, "step": 10770 }, { "epoch": 1.56, "grad_norm": 7.070750713348389, "learning_rate": 9.96992635507736e-07, "loss": 0.5031, "step": 10771 }, { "epoch": 1.56, "grad_norm": 6.872129440307617, "learning_rate": 9.968343536815542e-07, "loss": 0.5925, "step": 10772 }, { "epoch": 1.56, "grad_norm": 6.873866081237793, "learning_rate": 9.966760719346823e-07, "loss": 0.5261, "step": 10773 }, { "epoch": 1.56, "grad_norm": 6.890963077545166, "learning_rate": 9.965177902710862e-07, "loss": 0.5222, "step": 10774 }, { "epoch": 1.56, "grad_norm": 7.359591484069824, "learning_rate": 9.963595086947309e-07, "loss": 0.5426, "step": 10775 }, { "epoch": 1.56, "grad_norm": 6.062594413757324, "learning_rate": 9.962012272095826e-07, "loss": 0.5157, "step": 10776 }, { "epoch": 1.56, "grad_norm": 6.921217441558838, "learning_rate": 9.960429458196061e-07, "loss": 0.6324, "step": 10777 }, { "epoch": 1.56, "grad_norm": 6.590737342834473, "learning_rate": 9.958846645287674e-07, "loss": 0.5174, "step": 10778 }, { "epoch": 1.56, "grad_norm": 6.526573181152344, "learning_rate": 9.957263833410315e-07, "loss": 0.6039, "step": 10779 }, { "epoch": 1.56, "grad_norm": 6.555172443389893, "learning_rate": 9.95568102260364e-07, "loss": 0.575, "step": 10780 }, { "epoch": 1.56, "grad_norm": 6.711130142211914, "learning_rate": 9.95409821290731e-07, "loss": 0.5447, "step": 10781 }, { "epoch": 1.56, "grad_norm": 7.627818584442139, "learning_rate": 9.952515404360968e-07, "loss": 0.503, "step": 10782 }, { "epoch": 1.56, "grad_norm": 6.940924644470215, "learning_rate": 9.95093259700428e-07, "loss": 0.6046, "step": 10783 }, { "epoch": 1.56, "grad_norm": 6.467230796813965, "learning_rate": 9.949349790876897e-07, "loss": 0.5588, "step": 10784 }, { "epoch": 1.56, "grad_norm": 6.23764181137085, "learning_rate": 9.947766986018474e-07, "loss": 0.5047, "step": 10785 }, { "epoch": 1.57, "grad_norm": 5.917710304260254, "learning_rate": 9.946184182468657e-07, "loss": 0.5169, "step": 10786 }, { "epoch": 1.57, "grad_norm": 7.165543556213379, "learning_rate": 9.944601380267114e-07, "loss": 0.5702, "step": 10787 }, { "epoch": 1.57, "grad_norm": 6.814971446990967, "learning_rate": 9.943018579453491e-07, "loss": 0.533, "step": 10788 }, { "epoch": 1.57, "grad_norm": 5.819704055786133, "learning_rate": 9.941435780067445e-07, "loss": 0.4638, "step": 10789 }, { "epoch": 1.57, "grad_norm": 6.38775634765625, "learning_rate": 9.939852982148632e-07, "loss": 0.5251, "step": 10790 }, { "epoch": 1.57, "grad_norm": 6.739017963409424, "learning_rate": 9.938270185736705e-07, "loss": 0.5159, "step": 10791 }, { "epoch": 1.57, "grad_norm": 6.381942272186279, "learning_rate": 9.936687390871316e-07, "loss": 0.5466, "step": 10792 }, { "epoch": 1.57, "grad_norm": 6.258204936981201, "learning_rate": 9.935104597592124e-07, "loss": 0.5993, "step": 10793 }, { "epoch": 1.57, "grad_norm": 8.010136604309082, "learning_rate": 9.933521805938777e-07, "loss": 0.6331, "step": 10794 }, { "epoch": 1.57, "grad_norm": 5.975630760192871, "learning_rate": 9.931939015950937e-07, "loss": 0.4827, "step": 10795 }, { "epoch": 1.57, "grad_norm": 6.544589519500732, "learning_rate": 9.93035622766825e-07, "loss": 0.5165, "step": 10796 }, { "epoch": 1.57, "grad_norm": 5.433374881744385, "learning_rate": 9.92877344113038e-07, "loss": 0.4735, "step": 10797 }, { "epoch": 1.57, "grad_norm": 6.353094577789307, "learning_rate": 9.927190656376973e-07, "loss": 0.5569, "step": 10798 }, { "epoch": 1.57, "grad_norm": 6.470876693725586, "learning_rate": 9.925607873447689e-07, "loss": 0.5708, "step": 10799 }, { "epoch": 1.57, "grad_norm": 6.239012718200684, "learning_rate": 9.924025092382171e-07, "loss": 0.5627, "step": 10800 }, { "epoch": 1.57, "grad_norm": 6.495420932769775, "learning_rate": 9.92244231322009e-07, "loss": 0.5386, "step": 10801 }, { "epoch": 1.57, "grad_norm": 6.236661434173584, "learning_rate": 9.920859536001082e-07, "loss": 0.5275, "step": 10802 }, { "epoch": 1.57, "grad_norm": 6.520249843597412, "learning_rate": 9.919276760764816e-07, "loss": 0.5989, "step": 10803 }, { "epoch": 1.57, "grad_norm": 6.081907272338867, "learning_rate": 9.917693987550936e-07, "loss": 0.4999, "step": 10804 }, { "epoch": 1.57, "grad_norm": 6.6241679191589355, "learning_rate": 9.9161112163991e-07, "loss": 0.5859, "step": 10805 }, { "epoch": 1.57, "grad_norm": 6.294736862182617, "learning_rate": 9.914528447348966e-07, "loss": 0.5638, "step": 10806 }, { "epoch": 1.57, "grad_norm": 6.996273040771484, "learning_rate": 9.912945680440175e-07, "loss": 0.5815, "step": 10807 }, { "epoch": 1.57, "grad_norm": 7.052328109741211, "learning_rate": 9.911362915712396e-07, "loss": 0.5391, "step": 10808 }, { "epoch": 1.57, "grad_norm": 6.053342342376709, "learning_rate": 9.90978015320527e-07, "loss": 0.5433, "step": 10809 }, { "epoch": 1.57, "grad_norm": 6.5750732421875, "learning_rate": 9.908197392958458e-07, "loss": 0.5848, "step": 10810 }, { "epoch": 1.57, "grad_norm": 6.880033016204834, "learning_rate": 9.90661463501161e-07, "loss": 0.6135, "step": 10811 }, { "epoch": 1.57, "grad_norm": 6.0342841148376465, "learning_rate": 9.905031879404383e-07, "loss": 0.5167, "step": 10812 }, { "epoch": 1.57, "grad_norm": 6.028415203094482, "learning_rate": 9.903449126176426e-07, "loss": 0.5124, "step": 10813 }, { "epoch": 1.57, "grad_norm": 6.839015007019043, "learning_rate": 9.901866375367396e-07, "loss": 0.5642, "step": 10814 }, { "epoch": 1.57, "grad_norm": 6.281211853027344, "learning_rate": 9.90028362701694e-07, "loss": 0.5388, "step": 10815 }, { "epoch": 1.57, "grad_norm": 6.845225811004639, "learning_rate": 9.89870088116472e-07, "loss": 0.5417, "step": 10816 }, { "epoch": 1.57, "grad_norm": 7.244946479797363, "learning_rate": 9.897118137850382e-07, "loss": 0.5964, "step": 10817 }, { "epoch": 1.57, "grad_norm": 5.800626277923584, "learning_rate": 9.895535397113585e-07, "loss": 0.5067, "step": 10818 }, { "epoch": 1.57, "grad_norm": 5.8665547370910645, "learning_rate": 9.893952658993977e-07, "loss": 0.5104, "step": 10819 }, { "epoch": 1.57, "grad_norm": 6.391442775726318, "learning_rate": 9.892369923531216e-07, "loss": 0.5241, "step": 10820 }, { "epoch": 1.57, "grad_norm": 7.222010612487793, "learning_rate": 9.890787190764945e-07, "loss": 0.4935, "step": 10821 }, { "epoch": 1.57, "grad_norm": 6.7611284255981445, "learning_rate": 9.88920446073483e-07, "loss": 0.5341, "step": 10822 }, { "epoch": 1.57, "grad_norm": 6.669150352478027, "learning_rate": 9.887621733480512e-07, "loss": 0.525, "step": 10823 }, { "epoch": 1.57, "grad_norm": 7.33245849609375, "learning_rate": 9.886039009041654e-07, "loss": 0.5768, "step": 10824 }, { "epoch": 1.57, "grad_norm": 5.819614410400391, "learning_rate": 9.8844562874579e-07, "loss": 0.5022, "step": 10825 }, { "epoch": 1.57, "grad_norm": 6.730140686035156, "learning_rate": 9.882873568768907e-07, "loss": 0.4979, "step": 10826 }, { "epoch": 1.57, "grad_norm": 6.828928470611572, "learning_rate": 9.881290853014326e-07, "loss": 0.5725, "step": 10827 }, { "epoch": 1.57, "grad_norm": 6.16523551940918, "learning_rate": 9.879708140233812e-07, "loss": 0.5098, "step": 10828 }, { "epoch": 1.57, "grad_norm": 7.756975173950195, "learning_rate": 9.878125430467011e-07, "loss": 0.6328, "step": 10829 }, { "epoch": 1.57, "grad_norm": 6.443450927734375, "learning_rate": 9.876542723753582e-07, "loss": 0.4635, "step": 10830 }, { "epoch": 1.57, "grad_norm": 6.222548007965088, "learning_rate": 9.874960020133172e-07, "loss": 0.4977, "step": 10831 }, { "epoch": 1.57, "grad_norm": 6.471304893493652, "learning_rate": 9.873377319645438e-07, "loss": 0.5604, "step": 10832 }, { "epoch": 1.57, "grad_norm": 6.57289457321167, "learning_rate": 9.871794622330032e-07, "loss": 0.5967, "step": 10833 }, { "epoch": 1.57, "grad_norm": 6.608226776123047, "learning_rate": 9.870211928226596e-07, "loss": 0.573, "step": 10834 }, { "epoch": 1.57, "grad_norm": 6.63275146484375, "learning_rate": 9.868629237374795e-07, "loss": 0.5796, "step": 10835 }, { "epoch": 1.57, "grad_norm": 6.06021785736084, "learning_rate": 9.867046549814272e-07, "loss": 0.5511, "step": 10836 }, { "epoch": 1.57, "grad_norm": 6.653798580169678, "learning_rate": 9.865463865584686e-07, "loss": 0.5927, "step": 10837 }, { "epoch": 1.57, "grad_norm": 7.479808330535889, "learning_rate": 9.86388118472568e-07, "loss": 0.6118, "step": 10838 }, { "epoch": 1.57, "grad_norm": 6.758098602294922, "learning_rate": 9.862298507276911e-07, "loss": 0.517, "step": 10839 }, { "epoch": 1.57, "grad_norm": 5.961016654968262, "learning_rate": 9.860715833278031e-07, "loss": 0.5461, "step": 10840 }, { "epoch": 1.57, "grad_norm": 6.355329513549805, "learning_rate": 9.85913316276869e-07, "loss": 0.5272, "step": 10841 }, { "epoch": 1.57, "grad_norm": 6.84583044052124, "learning_rate": 9.857550495788533e-07, "loss": 0.5138, "step": 10842 }, { "epoch": 1.57, "grad_norm": 6.353479385375977, "learning_rate": 9.855967832377221e-07, "loss": 0.5597, "step": 10843 }, { "epoch": 1.57, "grad_norm": 6.779086112976074, "learning_rate": 9.8543851725744e-07, "loss": 0.5915, "step": 10844 }, { "epoch": 1.57, "grad_norm": 6.893314838409424, "learning_rate": 9.852802516419724e-07, "loss": 0.547, "step": 10845 }, { "epoch": 1.57, "grad_norm": 6.565151214599609, "learning_rate": 9.85121986395284e-07, "loss": 0.5872, "step": 10846 }, { "epoch": 1.57, "grad_norm": 6.322213649749756, "learning_rate": 9.849637215213402e-07, "loss": 0.5231, "step": 10847 }, { "epoch": 1.57, "grad_norm": 7.795380592346191, "learning_rate": 9.848054570241056e-07, "loss": 0.6197, "step": 10848 }, { "epoch": 1.57, "grad_norm": 6.9608025550842285, "learning_rate": 9.84647192907546e-07, "loss": 0.5437, "step": 10849 }, { "epoch": 1.57, "grad_norm": 7.523287296295166, "learning_rate": 9.844889291756255e-07, "loss": 0.6242, "step": 10850 }, { "epoch": 1.57, "grad_norm": 7.204124927520752, "learning_rate": 9.843306658323102e-07, "loss": 0.5244, "step": 10851 }, { "epoch": 1.57, "grad_norm": 5.73392915725708, "learning_rate": 9.841724028815642e-07, "loss": 0.5258, "step": 10852 }, { "epoch": 1.57, "grad_norm": 7.180935859680176, "learning_rate": 9.840141403273533e-07, "loss": 0.6031, "step": 10853 }, { "epoch": 1.57, "grad_norm": 6.638278007507324, "learning_rate": 9.83855878173642e-07, "loss": 0.5453, "step": 10854 }, { "epoch": 1.58, "grad_norm": 5.70119047164917, "learning_rate": 9.836976164243957e-07, "loss": 0.4759, "step": 10855 }, { "epoch": 1.58, "grad_norm": 5.920627117156982, "learning_rate": 9.835393550835786e-07, "loss": 0.5518, "step": 10856 }, { "epoch": 1.58, "grad_norm": 5.6929779052734375, "learning_rate": 9.833810941551561e-07, "loss": 0.5, "step": 10857 }, { "epoch": 1.58, "grad_norm": 6.1527099609375, "learning_rate": 9.83222833643094e-07, "loss": 0.5267, "step": 10858 }, { "epoch": 1.58, "grad_norm": 6.599904537200928, "learning_rate": 9.83064573551356e-07, "loss": 0.5806, "step": 10859 }, { "epoch": 1.58, "grad_norm": 5.542954444885254, "learning_rate": 9.82906313883908e-07, "loss": 0.5336, "step": 10860 }, { "epoch": 1.58, "grad_norm": 6.380799770355225, "learning_rate": 9.82748054644714e-07, "loss": 0.5137, "step": 10861 }, { "epoch": 1.58, "grad_norm": 6.063822269439697, "learning_rate": 9.8258979583774e-07, "loss": 0.5209, "step": 10862 }, { "epoch": 1.58, "grad_norm": 6.146076202392578, "learning_rate": 9.8243153746695e-07, "loss": 0.5511, "step": 10863 }, { "epoch": 1.58, "grad_norm": 6.949321746826172, "learning_rate": 9.822732795363096e-07, "loss": 0.564, "step": 10864 }, { "epoch": 1.58, "grad_norm": 5.823614120483398, "learning_rate": 9.821150220497828e-07, "loss": 0.4923, "step": 10865 }, { "epoch": 1.58, "grad_norm": 6.825497150421143, "learning_rate": 9.81956765011336e-07, "loss": 0.5447, "step": 10866 }, { "epoch": 1.58, "grad_norm": 7.487110614776611, "learning_rate": 9.817985084249325e-07, "loss": 0.5398, "step": 10867 }, { "epoch": 1.58, "grad_norm": 6.193394660949707, "learning_rate": 9.816402522945379e-07, "loss": 0.577, "step": 10868 }, { "epoch": 1.58, "grad_norm": 6.369415760040283, "learning_rate": 9.814819966241167e-07, "loss": 0.542, "step": 10869 }, { "epoch": 1.58, "grad_norm": 7.420207500457764, "learning_rate": 9.813237414176345e-07, "loss": 0.5752, "step": 10870 }, { "epoch": 1.58, "grad_norm": 6.7649030685424805, "learning_rate": 9.81165486679055e-07, "loss": 0.5788, "step": 10871 }, { "epoch": 1.58, "grad_norm": 6.030555248260498, "learning_rate": 9.810072324123443e-07, "loss": 0.5137, "step": 10872 }, { "epoch": 1.58, "grad_norm": 6.34384822845459, "learning_rate": 9.80848978621466e-07, "loss": 0.5058, "step": 10873 }, { "epoch": 1.58, "grad_norm": 6.942547798156738, "learning_rate": 9.806907253103858e-07, "loss": 0.5187, "step": 10874 }, { "epoch": 1.58, "grad_norm": 6.551295280456543, "learning_rate": 9.80532472483068e-07, "loss": 0.4521, "step": 10875 }, { "epoch": 1.58, "grad_norm": 6.273194313049316, "learning_rate": 9.803742201434775e-07, "loss": 0.5169, "step": 10876 }, { "epoch": 1.58, "grad_norm": 6.456666946411133, "learning_rate": 9.802159682955786e-07, "loss": 0.5691, "step": 10877 }, { "epoch": 1.58, "grad_norm": 6.888660907745361, "learning_rate": 9.80057716943337e-07, "loss": 0.5375, "step": 10878 }, { "epoch": 1.58, "grad_norm": 6.1399760246276855, "learning_rate": 9.798994660907165e-07, "loss": 0.478, "step": 10879 }, { "epoch": 1.58, "grad_norm": 6.897381782531738, "learning_rate": 9.797412157416826e-07, "loss": 0.6132, "step": 10880 }, { "epoch": 1.58, "grad_norm": 6.6391801834106445, "learning_rate": 9.795829659001993e-07, "loss": 0.4655, "step": 10881 }, { "epoch": 1.58, "grad_norm": 6.168660640716553, "learning_rate": 9.794247165702319e-07, "loss": 0.5269, "step": 10882 }, { "epoch": 1.58, "grad_norm": 7.833740234375, "learning_rate": 9.792664677557447e-07, "loss": 0.4995, "step": 10883 }, { "epoch": 1.58, "grad_norm": 7.081344127655029, "learning_rate": 9.79108219460702e-07, "loss": 0.5405, "step": 10884 }, { "epoch": 1.58, "grad_norm": 7.302643775939941, "learning_rate": 9.789499716890695e-07, "loss": 0.5823, "step": 10885 }, { "epoch": 1.58, "grad_norm": 6.238552093505859, "learning_rate": 9.78791724444811e-07, "loss": 0.5142, "step": 10886 }, { "epoch": 1.58, "grad_norm": 7.165762424468994, "learning_rate": 9.786334777318916e-07, "loss": 0.6088, "step": 10887 }, { "epoch": 1.58, "grad_norm": 6.266927242279053, "learning_rate": 9.784752315542756e-07, "loss": 0.5094, "step": 10888 }, { "epoch": 1.58, "grad_norm": 6.858401298522949, "learning_rate": 9.783169859159278e-07, "loss": 0.5032, "step": 10889 }, { "epoch": 1.58, "grad_norm": 6.471364974975586, "learning_rate": 9.781587408208122e-07, "loss": 0.5377, "step": 10890 }, { "epoch": 1.58, "grad_norm": 7.045785903930664, "learning_rate": 9.780004962728944e-07, "loss": 0.4814, "step": 10891 }, { "epoch": 1.58, "grad_norm": 6.8382158279418945, "learning_rate": 9.778422522761382e-07, "loss": 0.503, "step": 10892 }, { "epoch": 1.58, "grad_norm": 6.098807334899902, "learning_rate": 9.776840088345085e-07, "loss": 0.4619, "step": 10893 }, { "epoch": 1.58, "grad_norm": 6.740459442138672, "learning_rate": 9.775257659519695e-07, "loss": 0.4571, "step": 10894 }, { "epoch": 1.58, "grad_norm": 7.01633358001709, "learning_rate": 9.77367523632486e-07, "loss": 0.582, "step": 10895 }, { "epoch": 1.58, "grad_norm": 6.8705573081970215, "learning_rate": 9.772092818800226e-07, "loss": 0.4907, "step": 10896 }, { "epoch": 1.58, "grad_norm": 6.541099548339844, "learning_rate": 9.770510406985434e-07, "loss": 0.5965, "step": 10897 }, { "epoch": 1.58, "grad_norm": 6.519861698150635, "learning_rate": 9.76892800092013e-07, "loss": 0.4483, "step": 10898 }, { "epoch": 1.58, "grad_norm": 5.7698235511779785, "learning_rate": 9.767345600643962e-07, "loss": 0.4948, "step": 10899 }, { "epoch": 1.58, "grad_norm": 6.48932409286499, "learning_rate": 9.765763206196569e-07, "loss": 0.5318, "step": 10900 }, { "epoch": 1.58, "grad_norm": 6.384915351867676, "learning_rate": 9.764180817617602e-07, "loss": 0.5183, "step": 10901 }, { "epoch": 1.58, "grad_norm": 6.797413349151611, "learning_rate": 9.7625984349467e-07, "loss": 0.554, "step": 10902 }, { "epoch": 1.58, "grad_norm": 6.560110092163086, "learning_rate": 9.76101605822351e-07, "loss": 0.557, "step": 10903 }, { "epoch": 1.58, "grad_norm": 7.78077507019043, "learning_rate": 9.75943368748767e-07, "loss": 0.4946, "step": 10904 }, { "epoch": 1.58, "grad_norm": 6.927000999450684, "learning_rate": 9.757851322778832e-07, "loss": 0.5765, "step": 10905 }, { "epoch": 1.58, "grad_norm": 6.431811332702637, "learning_rate": 9.756268964136631e-07, "loss": 0.5699, "step": 10906 }, { "epoch": 1.58, "grad_norm": 6.381078720092773, "learning_rate": 9.75468661160072e-07, "loss": 0.5135, "step": 10907 }, { "epoch": 1.58, "grad_norm": 7.28284215927124, "learning_rate": 9.753104265210734e-07, "loss": 0.5663, "step": 10908 }, { "epoch": 1.58, "grad_norm": 6.192629337310791, "learning_rate": 9.75152192500632e-07, "loss": 0.5347, "step": 10909 }, { "epoch": 1.58, "grad_norm": 6.039917945861816, "learning_rate": 9.749939591027125e-07, "loss": 0.5469, "step": 10910 }, { "epoch": 1.58, "grad_norm": 6.47160005569458, "learning_rate": 9.74835726331278e-07, "loss": 0.5606, "step": 10911 }, { "epoch": 1.58, "grad_norm": 5.925068378448486, "learning_rate": 9.74677494190294e-07, "loss": 0.5484, "step": 10912 }, { "epoch": 1.58, "grad_norm": 6.472837924957275, "learning_rate": 9.745192626837239e-07, "loss": 0.5842, "step": 10913 }, { "epoch": 1.58, "grad_norm": 7.008252143859863, "learning_rate": 9.743610318155326e-07, "loss": 0.5889, "step": 10914 }, { "epoch": 1.58, "grad_norm": 6.597995758056641, "learning_rate": 9.74202801589684e-07, "loss": 0.4637, "step": 10915 }, { "epoch": 1.58, "grad_norm": 6.778687953948975, "learning_rate": 9.740445720101422e-07, "loss": 0.5293, "step": 10916 }, { "epoch": 1.58, "grad_norm": 6.102165699005127, "learning_rate": 9.738863430808713e-07, "loss": 0.4577, "step": 10917 }, { "epoch": 1.58, "grad_norm": 6.175479412078857, "learning_rate": 9.737281148058358e-07, "loss": 0.5133, "step": 10918 }, { "epoch": 1.58, "grad_norm": 7.285245418548584, "learning_rate": 9.735698871889995e-07, "loss": 0.5885, "step": 10919 }, { "epoch": 1.58, "grad_norm": 7.0751776695251465, "learning_rate": 9.73411660234327e-07, "loss": 0.4623, "step": 10920 }, { "epoch": 1.58, "grad_norm": 6.24822473526001, "learning_rate": 9.732534339457818e-07, "loss": 0.5375, "step": 10921 }, { "epoch": 1.58, "grad_norm": 5.806020736694336, "learning_rate": 9.73095208327329e-07, "loss": 0.4827, "step": 10922 }, { "epoch": 1.58, "grad_norm": 7.000607967376709, "learning_rate": 9.729369833829316e-07, "loss": 0.5547, "step": 10923 }, { "epoch": 1.59, "grad_norm": 6.36608362197876, "learning_rate": 9.727787591165542e-07, "loss": 0.5116, "step": 10924 }, { "epoch": 1.59, "grad_norm": 6.946708679199219, "learning_rate": 9.726205355321606e-07, "loss": 0.5768, "step": 10925 }, { "epoch": 1.59, "grad_norm": 6.288668155670166, "learning_rate": 9.724623126337152e-07, "loss": 0.4996, "step": 10926 }, { "epoch": 1.59, "grad_norm": 6.768301963806152, "learning_rate": 9.723040904251815e-07, "loss": 0.5161, "step": 10927 }, { "epoch": 1.59, "grad_norm": 6.41859769821167, "learning_rate": 9.721458689105241e-07, "loss": 0.4414, "step": 10928 }, { "epoch": 1.59, "grad_norm": 6.335123538970947, "learning_rate": 9.719876480937067e-07, "loss": 0.4506, "step": 10929 }, { "epoch": 1.59, "grad_norm": 6.766639709472656, "learning_rate": 9.71829427978693e-07, "loss": 0.5414, "step": 10930 }, { "epoch": 1.59, "grad_norm": 6.446244716644287, "learning_rate": 9.716712085694474e-07, "loss": 0.4947, "step": 10931 }, { "epoch": 1.59, "grad_norm": 7.75219202041626, "learning_rate": 9.71512989869934e-07, "loss": 0.5198, "step": 10932 }, { "epoch": 1.59, "grad_norm": 7.026568412780762, "learning_rate": 9.713547718841157e-07, "loss": 0.5385, "step": 10933 }, { "epoch": 1.59, "grad_norm": 6.604563236236572, "learning_rate": 9.711965546159576e-07, "loss": 0.534, "step": 10934 }, { "epoch": 1.59, "grad_norm": 6.1148881912231445, "learning_rate": 9.710383380694224e-07, "loss": 0.4824, "step": 10935 }, { "epoch": 1.59, "grad_norm": 6.2474751472473145, "learning_rate": 9.70880122248475e-07, "loss": 0.486, "step": 10936 }, { "epoch": 1.59, "grad_norm": 7.2764387130737305, "learning_rate": 9.70721907157079e-07, "loss": 0.5396, "step": 10937 }, { "epoch": 1.59, "grad_norm": 6.744892120361328, "learning_rate": 9.705636927991975e-07, "loss": 0.5288, "step": 10938 }, { "epoch": 1.59, "grad_norm": 5.666220188140869, "learning_rate": 9.704054791787952e-07, "loss": 0.5036, "step": 10939 }, { "epoch": 1.59, "grad_norm": 7.299661636352539, "learning_rate": 9.702472662998352e-07, "loss": 0.5029, "step": 10940 }, { "epoch": 1.59, "grad_norm": 6.00515604019165, "learning_rate": 9.700890541662822e-07, "loss": 0.5122, "step": 10941 }, { "epoch": 1.59, "grad_norm": 6.890753269195557, "learning_rate": 9.699308427820989e-07, "loss": 0.5644, "step": 10942 }, { "epoch": 1.59, "grad_norm": 6.615618705749512, "learning_rate": 9.697726321512496e-07, "loss": 0.5299, "step": 10943 }, { "epoch": 1.59, "grad_norm": 6.062300682067871, "learning_rate": 9.696144222776979e-07, "loss": 0.5267, "step": 10944 }, { "epoch": 1.59, "grad_norm": 5.95261812210083, "learning_rate": 9.694562131654078e-07, "loss": 0.4966, "step": 10945 }, { "epoch": 1.59, "grad_norm": 6.282312870025635, "learning_rate": 9.69298004818342e-07, "loss": 0.4996, "step": 10946 }, { "epoch": 1.59, "grad_norm": 6.104601860046387, "learning_rate": 9.691397972404653e-07, "loss": 0.5669, "step": 10947 }, { "epoch": 1.59, "grad_norm": 6.122970104217529, "learning_rate": 9.689815904357405e-07, "loss": 0.5605, "step": 10948 }, { "epoch": 1.59, "grad_norm": 5.930974006652832, "learning_rate": 9.688233844081319e-07, "loss": 0.5549, "step": 10949 }, { "epoch": 1.59, "grad_norm": 6.395487308502197, "learning_rate": 9.686651791616026e-07, "loss": 0.525, "step": 10950 }, { "epoch": 1.59, "grad_norm": 7.07739782333374, "learning_rate": 9.685069747001164e-07, "loss": 0.5234, "step": 10951 }, { "epoch": 1.59, "grad_norm": 6.0694146156311035, "learning_rate": 9.683487710276365e-07, "loss": 0.5227, "step": 10952 }, { "epoch": 1.59, "grad_norm": 6.725245952606201, "learning_rate": 9.68190568148127e-07, "loss": 0.5262, "step": 10953 }, { "epoch": 1.59, "grad_norm": 5.884990215301514, "learning_rate": 9.680323660655508e-07, "loss": 0.4816, "step": 10954 }, { "epoch": 1.59, "grad_norm": 6.230543613433838, "learning_rate": 9.678741647838722e-07, "loss": 0.5363, "step": 10955 }, { "epoch": 1.59, "grad_norm": 5.83229398727417, "learning_rate": 9.677159643070536e-07, "loss": 0.448, "step": 10956 }, { "epoch": 1.59, "grad_norm": 6.99551248550415, "learning_rate": 9.675577646390596e-07, "loss": 0.5782, "step": 10957 }, { "epoch": 1.59, "grad_norm": 6.102356910705566, "learning_rate": 9.67399565783853e-07, "loss": 0.4593, "step": 10958 }, { "epoch": 1.59, "grad_norm": 6.308566570281982, "learning_rate": 9.672413677453973e-07, "loss": 0.4541, "step": 10959 }, { "epoch": 1.59, "grad_norm": 6.407201290130615, "learning_rate": 9.670831705276557e-07, "loss": 0.4691, "step": 10960 }, { "epoch": 1.59, "grad_norm": 6.4079790115356445, "learning_rate": 9.669249741345916e-07, "loss": 0.4865, "step": 10961 }, { "epoch": 1.59, "grad_norm": 6.809704303741455, "learning_rate": 9.66766778570169e-07, "loss": 0.6248, "step": 10962 }, { "epoch": 1.59, "grad_norm": 6.091346740722656, "learning_rate": 9.666085838383505e-07, "loss": 0.5011, "step": 10963 }, { "epoch": 1.59, "grad_norm": 6.3136491775512695, "learning_rate": 9.664503899430997e-07, "loss": 0.5104, "step": 10964 }, { "epoch": 1.59, "grad_norm": 6.2046589851379395, "learning_rate": 9.662921968883798e-07, "loss": 0.4758, "step": 10965 }, { "epoch": 1.59, "grad_norm": 6.109251976013184, "learning_rate": 9.661340046781545e-07, "loss": 0.509, "step": 10966 }, { "epoch": 1.59, "grad_norm": 6.00793981552124, "learning_rate": 9.659758133163863e-07, "loss": 0.5182, "step": 10967 }, { "epoch": 1.59, "grad_norm": 6.946233749389648, "learning_rate": 9.658176228070391e-07, "loss": 0.5406, "step": 10968 }, { "epoch": 1.59, "grad_norm": 6.979294776916504, "learning_rate": 9.656594331540755e-07, "loss": 0.5178, "step": 10969 }, { "epoch": 1.59, "grad_norm": 6.379594326019287, "learning_rate": 9.655012443614594e-07, "loss": 0.5893, "step": 10970 }, { "epoch": 1.59, "grad_norm": 6.283186912536621, "learning_rate": 9.653430564331532e-07, "loss": 0.4897, "step": 10971 }, { "epoch": 1.59, "grad_norm": 6.2401204109191895, "learning_rate": 9.65184869373121e-07, "loss": 0.5269, "step": 10972 }, { "epoch": 1.59, "grad_norm": 6.190273284912109, "learning_rate": 9.650266831853244e-07, "loss": 0.512, "step": 10973 }, { "epoch": 1.59, "grad_norm": 6.44706392288208, "learning_rate": 9.648684978737283e-07, "loss": 0.5197, "step": 10974 }, { "epoch": 1.59, "grad_norm": 6.0740485191345215, "learning_rate": 9.647103134422942e-07, "loss": 0.5396, "step": 10975 }, { "epoch": 1.59, "grad_norm": 6.433392524719238, "learning_rate": 9.645521298949864e-07, "loss": 0.5425, "step": 10976 }, { "epoch": 1.59, "grad_norm": 6.048892021179199, "learning_rate": 9.64393947235767e-07, "loss": 0.57, "step": 10977 }, { "epoch": 1.59, "grad_norm": 6.257585525512695, "learning_rate": 9.642357654685996e-07, "loss": 0.5572, "step": 10978 }, { "epoch": 1.59, "grad_norm": 6.611635208129883, "learning_rate": 9.64077584597447e-07, "loss": 0.5743, "step": 10979 }, { "epoch": 1.59, "grad_norm": 6.57534122467041, "learning_rate": 9.639194046262723e-07, "loss": 0.5099, "step": 10980 }, { "epoch": 1.59, "grad_norm": 6.076777458190918, "learning_rate": 9.637612255590378e-07, "loss": 0.5302, "step": 10981 }, { "epoch": 1.59, "grad_norm": 6.743631362915039, "learning_rate": 9.636030473997073e-07, "loss": 0.5472, "step": 10982 }, { "epoch": 1.59, "grad_norm": 7.369006156921387, "learning_rate": 9.634448701522433e-07, "loss": 0.4677, "step": 10983 }, { "epoch": 1.59, "grad_norm": 6.913108825683594, "learning_rate": 9.632866938206088e-07, "loss": 0.512, "step": 10984 }, { "epoch": 1.59, "grad_norm": 6.629275321960449, "learning_rate": 9.631285184087663e-07, "loss": 0.5705, "step": 10985 }, { "epoch": 1.59, "grad_norm": 6.034577369689941, "learning_rate": 9.629703439206787e-07, "loss": 0.5351, "step": 10986 }, { "epoch": 1.59, "grad_norm": 6.899438381195068, "learning_rate": 9.628121703603092e-07, "loss": 0.5932, "step": 10987 }, { "epoch": 1.59, "grad_norm": 5.98927640914917, "learning_rate": 9.6265399773162e-07, "loss": 0.5339, "step": 10988 }, { "epoch": 1.59, "grad_norm": 7.286844253540039, "learning_rate": 9.624958260385747e-07, "loss": 0.5876, "step": 10989 }, { "epoch": 1.59, "grad_norm": 7.0029706954956055, "learning_rate": 9.623376552851352e-07, "loss": 0.5888, "step": 10990 }, { "epoch": 1.59, "grad_norm": 6.322272777557373, "learning_rate": 9.621794854752648e-07, "loss": 0.6162, "step": 10991 }, { "epoch": 1.59, "grad_norm": 6.4144816398620605, "learning_rate": 9.620213166129258e-07, "loss": 0.4826, "step": 10992 }, { "epoch": 1.6, "grad_norm": 6.1521406173706055, "learning_rate": 9.618631487020813e-07, "loss": 0.4579, "step": 10993 }, { "epoch": 1.6, "grad_norm": 5.904198169708252, "learning_rate": 9.617049817466932e-07, "loss": 0.4724, "step": 10994 }, { "epoch": 1.6, "grad_norm": 7.221553325653076, "learning_rate": 9.61546815750725e-07, "loss": 0.5672, "step": 10995 }, { "epoch": 1.6, "grad_norm": 7.120107650756836, "learning_rate": 9.613886507181382e-07, "loss": 0.5347, "step": 10996 }, { "epoch": 1.6, "grad_norm": 6.6074113845825195, "learning_rate": 9.612304866528966e-07, "loss": 0.5431, "step": 10997 }, { "epoch": 1.6, "grad_norm": 7.140235424041748, "learning_rate": 9.61072323558962e-07, "loss": 0.6018, "step": 10998 }, { "epoch": 1.6, "grad_norm": 6.824549674987793, "learning_rate": 9.60914161440297e-07, "loss": 0.5573, "step": 10999 }, { "epoch": 1.6, "grad_norm": 6.748468399047852, "learning_rate": 9.607560003008644e-07, "loss": 0.5225, "step": 11000 }, { "epoch": 1.6, "grad_norm": 6.937241077423096, "learning_rate": 9.605978401446264e-07, "loss": 0.6558, "step": 11001 }, { "epoch": 1.6, "grad_norm": 6.726737976074219, "learning_rate": 9.60439680975545e-07, "loss": 0.5378, "step": 11002 }, { "epoch": 1.6, "grad_norm": 6.556742191314697, "learning_rate": 9.602815227975838e-07, "loss": 0.5466, "step": 11003 }, { "epoch": 1.6, "grad_norm": 5.8503193855285645, "learning_rate": 9.60123365614704e-07, "loss": 0.4846, "step": 11004 }, { "epoch": 1.6, "grad_norm": 6.425524711608887, "learning_rate": 9.59965209430869e-07, "loss": 0.4856, "step": 11005 }, { "epoch": 1.6, "grad_norm": 6.472404479980469, "learning_rate": 9.598070542500402e-07, "loss": 0.5244, "step": 11006 }, { "epoch": 1.6, "grad_norm": 6.747954845428467, "learning_rate": 9.596489000761807e-07, "loss": 0.4867, "step": 11007 }, { "epoch": 1.6, "grad_norm": 6.423892974853516, "learning_rate": 9.594907469132518e-07, "loss": 0.5324, "step": 11008 }, { "epoch": 1.6, "grad_norm": 7.0990309715271, "learning_rate": 9.593325947652171e-07, "loss": 0.4832, "step": 11009 }, { "epoch": 1.6, "grad_norm": 7.168073654174805, "learning_rate": 9.591744436360377e-07, "loss": 0.4856, "step": 11010 }, { "epoch": 1.6, "grad_norm": 5.698000907897949, "learning_rate": 9.590162935296768e-07, "loss": 0.444, "step": 11011 }, { "epoch": 1.6, "grad_norm": 7.236581325531006, "learning_rate": 9.588581444500954e-07, "loss": 0.4992, "step": 11012 }, { "epoch": 1.6, "grad_norm": 6.1676435470581055, "learning_rate": 9.586999964012569e-07, "loss": 0.5257, "step": 11013 }, { "epoch": 1.6, "grad_norm": 7.474936485290527, "learning_rate": 9.585418493871227e-07, "loss": 0.5251, "step": 11014 }, { "epoch": 1.6, "grad_norm": 5.936534881591797, "learning_rate": 9.583837034116547e-07, "loss": 0.5354, "step": 11015 }, { "epoch": 1.6, "grad_norm": 6.103018283843994, "learning_rate": 9.58225558478816e-07, "loss": 0.4699, "step": 11016 }, { "epoch": 1.6, "grad_norm": 7.017008304595947, "learning_rate": 9.580674145925677e-07, "loss": 0.5681, "step": 11017 }, { "epoch": 1.6, "grad_norm": 7.178301811218262, "learning_rate": 9.579092717568725e-07, "loss": 0.5518, "step": 11018 }, { "epoch": 1.6, "grad_norm": 6.6396002769470215, "learning_rate": 9.577511299756918e-07, "loss": 0.5429, "step": 11019 }, { "epoch": 1.6, "grad_norm": 6.433734893798828, "learning_rate": 9.575929892529882e-07, "loss": 0.4508, "step": 11020 }, { "epoch": 1.6, "grad_norm": 6.583924293518066, "learning_rate": 9.57434849592723e-07, "loss": 0.5173, "step": 11021 }, { "epoch": 1.6, "grad_norm": 6.809114456176758, "learning_rate": 9.572767109988587e-07, "loss": 0.5912, "step": 11022 }, { "epoch": 1.6, "grad_norm": 6.88601541519165, "learning_rate": 9.571185734753568e-07, "loss": 0.5325, "step": 11023 }, { "epoch": 1.6, "grad_norm": 6.69570779800415, "learning_rate": 9.569604370261797e-07, "loss": 0.5671, "step": 11024 }, { "epoch": 1.6, "grad_norm": 5.925332546234131, "learning_rate": 9.568023016552885e-07, "loss": 0.5316, "step": 11025 }, { "epoch": 1.6, "grad_norm": 6.423453330993652, "learning_rate": 9.566441673666457e-07, "loss": 0.493, "step": 11026 }, { "epoch": 1.6, "grad_norm": 7.025855541229248, "learning_rate": 9.564860341642128e-07, "loss": 0.4908, "step": 11027 }, { "epoch": 1.6, "grad_norm": 6.163492202758789, "learning_rate": 9.56327902051952e-07, "loss": 0.578, "step": 11028 }, { "epoch": 1.6, "grad_norm": 7.656181812286377, "learning_rate": 9.56169771033824e-07, "loss": 0.5747, "step": 11029 }, { "epoch": 1.6, "grad_norm": 5.749497890472412, "learning_rate": 9.560116411137915e-07, "loss": 0.4442, "step": 11030 }, { "epoch": 1.6, "grad_norm": 6.851305961608887, "learning_rate": 9.558535122958156e-07, "loss": 0.5651, "step": 11031 }, { "epoch": 1.6, "grad_norm": 6.656444072723389, "learning_rate": 9.556953845838585e-07, "loss": 0.5456, "step": 11032 }, { "epoch": 1.6, "grad_norm": 7.101266860961914, "learning_rate": 9.555372579818814e-07, "loss": 0.5583, "step": 11033 }, { "epoch": 1.6, "grad_norm": 6.5468668937683105, "learning_rate": 9.553791324938464e-07, "loss": 0.5371, "step": 11034 }, { "epoch": 1.6, "grad_norm": 7.5664801597595215, "learning_rate": 9.552210081237144e-07, "loss": 0.642, "step": 11035 }, { "epoch": 1.6, "grad_norm": 7.226005554199219, "learning_rate": 9.550628848754473e-07, "loss": 0.5644, "step": 11036 }, { "epoch": 1.6, "grad_norm": 6.018839359283447, "learning_rate": 9.549047627530066e-07, "loss": 0.5531, "step": 11037 }, { "epoch": 1.6, "grad_norm": 6.146134853363037, "learning_rate": 9.54746641760354e-07, "loss": 0.5065, "step": 11038 }, { "epoch": 1.6, "grad_norm": 6.848090648651123, "learning_rate": 9.545885219014503e-07, "loss": 0.5699, "step": 11039 }, { "epoch": 1.6, "grad_norm": 8.04382038116455, "learning_rate": 9.544304031802577e-07, "loss": 0.5508, "step": 11040 }, { "epoch": 1.6, "grad_norm": 6.488950729370117, "learning_rate": 9.542722856007374e-07, "loss": 0.5404, "step": 11041 }, { "epoch": 1.6, "grad_norm": 7.0475335121154785, "learning_rate": 9.541141691668504e-07, "loss": 0.5627, "step": 11042 }, { "epoch": 1.6, "grad_norm": 5.9456257820129395, "learning_rate": 9.539560538825588e-07, "loss": 0.5257, "step": 11043 }, { "epoch": 1.6, "grad_norm": 6.632178783416748, "learning_rate": 9.53797939751823e-07, "loss": 0.5399, "step": 11044 }, { "epoch": 1.6, "grad_norm": 5.869229793548584, "learning_rate": 9.536398267786051e-07, "loss": 0.5264, "step": 11045 }, { "epoch": 1.6, "grad_norm": 7.094176292419434, "learning_rate": 9.534817149668656e-07, "loss": 0.4809, "step": 11046 }, { "epoch": 1.6, "grad_norm": 6.766459941864014, "learning_rate": 9.533236043205666e-07, "loss": 0.5609, "step": 11047 }, { "epoch": 1.6, "grad_norm": 6.401455879211426, "learning_rate": 9.531654948436685e-07, "loss": 0.5176, "step": 11048 }, { "epoch": 1.6, "grad_norm": 7.16441535949707, "learning_rate": 9.530073865401332e-07, "loss": 0.5316, "step": 11049 }, { "epoch": 1.6, "grad_norm": 6.5703816413879395, "learning_rate": 9.52849279413921e-07, "loss": 0.4606, "step": 11050 }, { "epoch": 1.6, "grad_norm": 5.8646368980407715, "learning_rate": 9.526911734689939e-07, "loss": 0.4847, "step": 11051 }, { "epoch": 1.6, "grad_norm": 7.282791614532471, "learning_rate": 9.525330687093123e-07, "loss": 0.5243, "step": 11052 }, { "epoch": 1.6, "grad_norm": 7.015524864196777, "learning_rate": 9.523749651388378e-07, "loss": 0.5594, "step": 11053 }, { "epoch": 1.6, "grad_norm": 6.720893383026123, "learning_rate": 9.52216862761531e-07, "loss": 0.5026, "step": 11054 }, { "epoch": 1.6, "grad_norm": 6.879849910736084, "learning_rate": 9.520587615813531e-07, "loss": 0.5103, "step": 11055 }, { "epoch": 1.6, "grad_norm": 6.865689277648926, "learning_rate": 9.51900661602265e-07, "loss": 0.5355, "step": 11056 }, { "epoch": 1.6, "grad_norm": 6.316258430480957, "learning_rate": 9.517425628282278e-07, "loss": 0.5211, "step": 11057 }, { "epoch": 1.6, "grad_norm": 6.713664531707764, "learning_rate": 9.51584465263202e-07, "loss": 0.5464, "step": 11058 }, { "epoch": 1.6, "grad_norm": 6.724331855773926, "learning_rate": 9.514263689111492e-07, "loss": 0.5164, "step": 11059 }, { "epoch": 1.6, "grad_norm": 7.069874286651611, "learning_rate": 9.512682737760292e-07, "loss": 0.5445, "step": 11060 }, { "epoch": 1.6, "grad_norm": 6.641468048095703, "learning_rate": 9.51110179861804e-07, "loss": 0.4824, "step": 11061 }, { "epoch": 1.61, "grad_norm": 5.946991920471191, "learning_rate": 9.509520871724335e-07, "loss": 0.5067, "step": 11062 }, { "epoch": 1.61, "grad_norm": 7.3610334396362305, "learning_rate": 9.50793995711879e-07, "loss": 0.5046, "step": 11063 }, { "epoch": 1.61, "grad_norm": 6.938571929931641, "learning_rate": 9.506359054841006e-07, "loss": 0.5669, "step": 11064 }, { "epoch": 1.61, "grad_norm": 7.032840728759766, "learning_rate": 9.504778164930597e-07, "loss": 0.5699, "step": 11065 }, { "epoch": 1.61, "grad_norm": 6.166210174560547, "learning_rate": 9.503197287427163e-07, "loss": 0.5097, "step": 11066 }, { "epoch": 1.61, "grad_norm": 6.619221210479736, "learning_rate": 9.501616422370316e-07, "loss": 0.4911, "step": 11067 }, { "epoch": 1.61, "grad_norm": 6.207603931427002, "learning_rate": 9.50003556979966e-07, "loss": 0.503, "step": 11068 }, { "epoch": 1.61, "grad_norm": 7.466029644012451, "learning_rate": 9.498454729754799e-07, "loss": 0.5418, "step": 11069 }, { "epoch": 1.61, "grad_norm": 6.403709411621094, "learning_rate": 9.496873902275343e-07, "loss": 0.5196, "step": 11070 }, { "epoch": 1.61, "grad_norm": 6.43207311630249, "learning_rate": 9.49529308740089e-07, "loss": 0.5456, "step": 11071 }, { "epoch": 1.61, "grad_norm": 5.908441543579102, "learning_rate": 9.493712285171052e-07, "loss": 0.5404, "step": 11072 }, { "epoch": 1.61, "grad_norm": 6.949752330780029, "learning_rate": 9.492131495625426e-07, "loss": 0.526, "step": 11073 }, { "epoch": 1.61, "grad_norm": 7.284825801849365, "learning_rate": 9.490550718803624e-07, "loss": 0.6029, "step": 11074 }, { "epoch": 1.61, "grad_norm": 6.233062267303467, "learning_rate": 9.488969954745246e-07, "loss": 0.5648, "step": 11075 }, { "epoch": 1.61, "grad_norm": 5.8116583824157715, "learning_rate": 9.487389203489896e-07, "loss": 0.5436, "step": 11076 }, { "epoch": 1.61, "grad_norm": 6.214008331298828, "learning_rate": 9.485808465077171e-07, "loss": 0.5213, "step": 11077 }, { "epoch": 1.61, "grad_norm": 6.193724155426025, "learning_rate": 9.484227739546686e-07, "loss": 0.5148, "step": 11078 }, { "epoch": 1.61, "grad_norm": 6.430867671966553, "learning_rate": 9.482647026938032e-07, "loss": 0.5141, "step": 11079 }, { "epoch": 1.61, "grad_norm": 7.237696170806885, "learning_rate": 9.481066327290821e-07, "loss": 0.5396, "step": 11080 }, { "epoch": 1.61, "grad_norm": 6.477845668792725, "learning_rate": 9.479485640644644e-07, "loss": 0.5349, "step": 11081 }, { "epoch": 1.61, "grad_norm": 6.464302062988281, "learning_rate": 9.477904967039115e-07, "loss": 0.5056, "step": 11082 }, { "epoch": 1.61, "grad_norm": 6.336823463439941, "learning_rate": 9.476324306513825e-07, "loss": 0.519, "step": 11083 }, { "epoch": 1.61, "grad_norm": 6.919009685516357, "learning_rate": 9.47474365910838e-07, "loss": 0.511, "step": 11084 }, { "epoch": 1.61, "grad_norm": 5.819835186004639, "learning_rate": 9.473163024862376e-07, "loss": 0.4605, "step": 11085 }, { "epoch": 1.61, "grad_norm": 6.876998424530029, "learning_rate": 9.471582403815421e-07, "loss": 0.6066, "step": 11086 }, { "epoch": 1.61, "grad_norm": 6.501221656799316, "learning_rate": 9.470001796007106e-07, "loss": 0.5338, "step": 11087 }, { "epoch": 1.61, "grad_norm": 7.183300971984863, "learning_rate": 9.468421201477038e-07, "loss": 0.6098, "step": 11088 }, { "epoch": 1.61, "grad_norm": 6.1276960372924805, "learning_rate": 9.466840620264812e-07, "loss": 0.505, "step": 11089 }, { "epoch": 1.61, "grad_norm": 7.199135780334473, "learning_rate": 9.465260052410027e-07, "loss": 0.4909, "step": 11090 }, { "epoch": 1.61, "grad_norm": 6.259033203125, "learning_rate": 9.463679497952284e-07, "loss": 0.51, "step": 11091 }, { "epoch": 1.61, "grad_norm": 6.774545192718506, "learning_rate": 9.462098956931177e-07, "loss": 0.5271, "step": 11092 }, { "epoch": 1.61, "grad_norm": 6.3440842628479, "learning_rate": 9.46051842938631e-07, "loss": 0.4942, "step": 11093 }, { "epoch": 1.61, "grad_norm": 6.332381725311279, "learning_rate": 9.458937915357274e-07, "loss": 0.4767, "step": 11094 }, { "epoch": 1.61, "grad_norm": 6.488229274749756, "learning_rate": 9.457357414883673e-07, "loss": 0.4718, "step": 11095 }, { "epoch": 1.61, "grad_norm": 7.221517086029053, "learning_rate": 9.455776928005099e-07, "loss": 0.5625, "step": 11096 }, { "epoch": 1.61, "grad_norm": 6.166384220123291, "learning_rate": 9.454196454761151e-07, "loss": 0.4879, "step": 11097 }, { "epoch": 1.61, "grad_norm": 6.232700824737549, "learning_rate": 9.45261599519142e-07, "loss": 0.5425, "step": 11098 }, { "epoch": 1.61, "grad_norm": 6.189609527587891, "learning_rate": 9.45103554933551e-07, "loss": 0.489, "step": 11099 }, { "epoch": 1.61, "grad_norm": 6.35286808013916, "learning_rate": 9.449455117233009e-07, "loss": 0.5436, "step": 11100 }, { "epoch": 1.61, "grad_norm": 7.366237640380859, "learning_rate": 9.447874698923519e-07, "loss": 0.6069, "step": 11101 }, { "epoch": 1.61, "grad_norm": 7.336950302124023, "learning_rate": 9.446294294446626e-07, "loss": 0.5802, "step": 11102 }, { "epoch": 1.61, "grad_norm": 6.762840747833252, "learning_rate": 9.444713903841935e-07, "loss": 0.5355, "step": 11103 }, { "epoch": 1.61, "grad_norm": 6.545024871826172, "learning_rate": 9.443133527149034e-07, "loss": 0.5918, "step": 11104 }, { "epoch": 1.61, "grad_norm": 7.798702239990234, "learning_rate": 9.441553164407519e-07, "loss": 0.5885, "step": 11105 }, { "epoch": 1.61, "grad_norm": 7.354002952575684, "learning_rate": 9.439972815656977e-07, "loss": 0.5907, "step": 11106 }, { "epoch": 1.61, "grad_norm": 7.065256595611572, "learning_rate": 9.43839248093701e-07, "loss": 0.5739, "step": 11107 }, { "epoch": 1.61, "grad_norm": 6.289105415344238, "learning_rate": 9.436812160287205e-07, "loss": 0.4685, "step": 11108 }, { "epoch": 1.61, "grad_norm": 6.434062957763672, "learning_rate": 9.435231853747159e-07, "loss": 0.544, "step": 11109 }, { "epoch": 1.61, "grad_norm": 6.421985626220703, "learning_rate": 9.433651561356458e-07, "loss": 0.4456, "step": 11110 }, { "epoch": 1.61, "grad_norm": 6.4701032638549805, "learning_rate": 9.432071283154698e-07, "loss": 0.5174, "step": 11111 }, { "epoch": 1.61, "grad_norm": 6.8817830085754395, "learning_rate": 9.43049101918147e-07, "loss": 0.4779, "step": 11112 }, { "epoch": 1.61, "grad_norm": 6.432901382446289, "learning_rate": 9.428910769476366e-07, "loss": 0.5608, "step": 11113 }, { "epoch": 1.61, "grad_norm": 6.347121715545654, "learning_rate": 9.42733053407897e-07, "loss": 0.54, "step": 11114 }, { "epoch": 1.61, "grad_norm": 6.428431510925293, "learning_rate": 9.425750313028881e-07, "loss": 0.4963, "step": 11115 }, { "epoch": 1.61, "grad_norm": 6.012794494628906, "learning_rate": 9.424170106365681e-07, "loss": 0.5137, "step": 11116 }, { "epoch": 1.61, "grad_norm": 6.660874366760254, "learning_rate": 9.422589914128967e-07, "loss": 0.6147, "step": 11117 }, { "epoch": 1.61, "grad_norm": 6.5224127769470215, "learning_rate": 9.421009736358323e-07, "loss": 0.579, "step": 11118 }, { "epoch": 1.61, "grad_norm": 7.088024139404297, "learning_rate": 9.419429573093337e-07, "loss": 0.578, "step": 11119 }, { "epoch": 1.61, "grad_norm": 6.126925945281982, "learning_rate": 9.417849424373604e-07, "loss": 0.469, "step": 11120 }, { "epoch": 1.61, "grad_norm": 6.17840576171875, "learning_rate": 9.416269290238703e-07, "loss": 0.5461, "step": 11121 }, { "epoch": 1.61, "grad_norm": 6.337304592132568, "learning_rate": 9.414689170728231e-07, "loss": 0.4585, "step": 11122 }, { "epoch": 1.61, "grad_norm": 7.039618968963623, "learning_rate": 9.41310906588177e-07, "loss": 0.5956, "step": 11123 }, { "epoch": 1.61, "grad_norm": 7.079921722412109, "learning_rate": 9.411528975738906e-07, "loss": 0.5813, "step": 11124 }, { "epoch": 1.61, "grad_norm": 6.200289726257324, "learning_rate": 9.409948900339229e-07, "loss": 0.5319, "step": 11125 }, { "epoch": 1.61, "grad_norm": 6.52766752243042, "learning_rate": 9.408368839722324e-07, "loss": 0.5431, "step": 11126 }, { "epoch": 1.61, "grad_norm": 7.103618144989014, "learning_rate": 9.406788793927773e-07, "loss": 0.5791, "step": 11127 }, { "epoch": 1.61, "grad_norm": 7.16490364074707, "learning_rate": 9.405208762995169e-07, "loss": 0.5391, "step": 11128 }, { "epoch": 1.61, "grad_norm": 7.189971446990967, "learning_rate": 9.403628746964088e-07, "loss": 0.6557, "step": 11129 }, { "epoch": 1.61, "grad_norm": 6.48034143447876, "learning_rate": 9.402048745874126e-07, "loss": 0.4964, "step": 11130 }, { "epoch": 1.62, "grad_norm": 6.719052791595459, "learning_rate": 9.400468759764858e-07, "loss": 0.4809, "step": 11131 }, { "epoch": 1.62, "grad_norm": 6.105070114135742, "learning_rate": 9.398888788675872e-07, "loss": 0.5012, "step": 11132 }, { "epoch": 1.62, "grad_norm": 6.616537094116211, "learning_rate": 9.397308832646748e-07, "loss": 0.5048, "step": 11133 }, { "epoch": 1.62, "grad_norm": 6.164754867553711, "learning_rate": 9.395728891717075e-07, "loss": 0.5599, "step": 11134 }, { "epoch": 1.62, "grad_norm": 6.315562725067139, "learning_rate": 9.394148965926429e-07, "loss": 0.4727, "step": 11135 }, { "epoch": 1.62, "grad_norm": 6.493617057800293, "learning_rate": 9.392569055314401e-07, "loss": 0.5528, "step": 11136 }, { "epoch": 1.62, "grad_norm": 5.773064136505127, "learning_rate": 9.390989159920563e-07, "loss": 0.4605, "step": 11137 }, { "epoch": 1.62, "grad_norm": 7.52760648727417, "learning_rate": 9.389409279784508e-07, "loss": 0.5686, "step": 11138 }, { "epoch": 1.62, "grad_norm": 6.737869739532471, "learning_rate": 9.387829414945808e-07, "loss": 0.5574, "step": 11139 }, { "epoch": 1.62, "grad_norm": 7.483359336853027, "learning_rate": 9.38624956544405e-07, "loss": 0.6662, "step": 11140 }, { "epoch": 1.62, "grad_norm": 6.743470191955566, "learning_rate": 9.384669731318807e-07, "loss": 0.5539, "step": 11141 }, { "epoch": 1.62, "grad_norm": 7.127442359924316, "learning_rate": 9.383089912609669e-07, "loss": 0.5571, "step": 11142 }, { "epoch": 1.62, "grad_norm": 7.284930229187012, "learning_rate": 9.381510109356207e-07, "loss": 0.5935, "step": 11143 }, { "epoch": 1.62, "grad_norm": 7.469302654266357, "learning_rate": 9.379930321598005e-07, "loss": 0.5363, "step": 11144 }, { "epoch": 1.62, "grad_norm": 6.134698390960693, "learning_rate": 9.378350549374646e-07, "loss": 0.5693, "step": 11145 }, { "epoch": 1.62, "grad_norm": 7.037693500518799, "learning_rate": 9.376770792725697e-07, "loss": 0.5744, "step": 11146 }, { "epoch": 1.62, "grad_norm": 6.4941277503967285, "learning_rate": 9.375191051690747e-07, "loss": 0.5497, "step": 11147 }, { "epoch": 1.62, "grad_norm": 7.606416702270508, "learning_rate": 9.373611326309368e-07, "loss": 0.5845, "step": 11148 }, { "epoch": 1.62, "grad_norm": 7.703206539154053, "learning_rate": 9.372031616621143e-07, "loss": 0.5447, "step": 11149 }, { "epoch": 1.62, "grad_norm": 6.795352935791016, "learning_rate": 9.37045192266564e-07, "loss": 0.5961, "step": 11150 }, { "epoch": 1.62, "grad_norm": 6.37642765045166, "learning_rate": 9.368872244482447e-07, "loss": 0.5669, "step": 11151 }, { "epoch": 1.62, "grad_norm": 7.612519264221191, "learning_rate": 9.367292582111131e-07, "loss": 0.5397, "step": 11152 }, { "epoch": 1.62, "grad_norm": 6.867259502410889, "learning_rate": 9.365712935591274e-07, "loss": 0.5183, "step": 11153 }, { "epoch": 1.62, "grad_norm": 7.212253093719482, "learning_rate": 9.364133304962445e-07, "loss": 0.6294, "step": 11154 }, { "epoch": 1.62, "grad_norm": 6.742476940155029, "learning_rate": 9.362553690264227e-07, "loss": 0.5659, "step": 11155 }, { "epoch": 1.62, "grad_norm": 6.765030860900879, "learning_rate": 9.360974091536186e-07, "loss": 0.5391, "step": 11156 }, { "epoch": 1.62, "grad_norm": 7.0405402183532715, "learning_rate": 9.359394508817905e-07, "loss": 0.5632, "step": 11157 }, { "epoch": 1.62, "grad_norm": 6.190103054046631, "learning_rate": 9.357814942148951e-07, "loss": 0.5182, "step": 11158 }, { "epoch": 1.62, "grad_norm": 5.935014247894287, "learning_rate": 9.356235391568902e-07, "loss": 0.5065, "step": 11159 }, { "epoch": 1.62, "grad_norm": 6.333490371704102, "learning_rate": 9.354655857117329e-07, "loss": 0.5829, "step": 11160 }, { "epoch": 1.62, "grad_norm": 6.167308330535889, "learning_rate": 9.353076338833805e-07, "loss": 0.4729, "step": 11161 }, { "epoch": 1.62, "grad_norm": 6.225687026977539, "learning_rate": 9.3514968367579e-07, "loss": 0.4833, "step": 11162 }, { "epoch": 1.62, "grad_norm": 5.737827301025391, "learning_rate": 9.34991735092919e-07, "loss": 0.5152, "step": 11163 }, { "epoch": 1.62, "grad_norm": 6.852575302124023, "learning_rate": 9.348337881387243e-07, "loss": 0.5257, "step": 11164 }, { "epoch": 1.62, "grad_norm": 7.353794097900391, "learning_rate": 9.346758428171633e-07, "loss": 0.5639, "step": 11165 }, { "epoch": 1.62, "grad_norm": 6.982980251312256, "learning_rate": 9.34517899132193e-07, "loss": 0.5671, "step": 11166 }, { "epoch": 1.62, "grad_norm": 8.235060691833496, "learning_rate": 9.343599570877704e-07, "loss": 0.489, "step": 11167 }, { "epoch": 1.62, "grad_norm": 6.306457042694092, "learning_rate": 9.342020166878519e-07, "loss": 0.5183, "step": 11168 }, { "epoch": 1.62, "grad_norm": 8.063907623291016, "learning_rate": 9.340440779363956e-07, "loss": 0.5757, "step": 11169 }, { "epoch": 1.62, "grad_norm": 6.721061706542969, "learning_rate": 9.338861408373572e-07, "loss": 0.5174, "step": 11170 }, { "epoch": 1.62, "grad_norm": 6.084771633148193, "learning_rate": 9.337282053946942e-07, "loss": 0.5454, "step": 11171 }, { "epoch": 1.62, "grad_norm": 5.9890594482421875, "learning_rate": 9.335702716123635e-07, "loss": 0.5518, "step": 11172 }, { "epoch": 1.62, "grad_norm": 6.954228401184082, "learning_rate": 9.334123394943219e-07, "loss": 0.5787, "step": 11173 }, { "epoch": 1.62, "grad_norm": 5.8240647315979, "learning_rate": 9.332544090445259e-07, "loss": 0.5133, "step": 11174 }, { "epoch": 1.62, "grad_norm": 5.820871353149414, "learning_rate": 9.330964802669318e-07, "loss": 0.4912, "step": 11175 }, { "epoch": 1.62, "grad_norm": 7.004462242126465, "learning_rate": 9.329385531654972e-07, "loss": 0.5645, "step": 11176 }, { "epoch": 1.62, "grad_norm": 7.1228837966918945, "learning_rate": 9.327806277441776e-07, "loss": 0.5275, "step": 11177 }, { "epoch": 1.62, "grad_norm": 7.192093849182129, "learning_rate": 9.326227040069308e-07, "loss": 0.5865, "step": 11178 }, { "epoch": 1.62, "grad_norm": 5.892540454864502, "learning_rate": 9.324647819577124e-07, "loss": 0.494, "step": 11179 }, { "epoch": 1.62, "grad_norm": 7.00681209564209, "learning_rate": 9.323068616004793e-07, "loss": 0.5211, "step": 11180 }, { "epoch": 1.62, "grad_norm": 8.148612022399902, "learning_rate": 9.321489429391873e-07, "loss": 0.6311, "step": 11181 }, { "epoch": 1.62, "grad_norm": 6.393671989440918, "learning_rate": 9.319910259777939e-07, "loss": 0.5562, "step": 11182 }, { "epoch": 1.62, "grad_norm": 6.457668781280518, "learning_rate": 9.318331107202543e-07, "loss": 0.6019, "step": 11183 }, { "epoch": 1.62, "grad_norm": 6.0349860191345215, "learning_rate": 9.316751971705258e-07, "loss": 0.503, "step": 11184 }, { "epoch": 1.62, "grad_norm": 6.064414024353027, "learning_rate": 9.315172853325638e-07, "loss": 0.5381, "step": 11185 }, { "epoch": 1.62, "grad_norm": 6.783163070678711, "learning_rate": 9.313593752103254e-07, "loss": 0.5032, "step": 11186 }, { "epoch": 1.62, "grad_norm": 6.544008255004883, "learning_rate": 9.31201466807766e-07, "loss": 0.5317, "step": 11187 }, { "epoch": 1.62, "grad_norm": 7.054596424102783, "learning_rate": 9.310435601288424e-07, "loss": 0.5785, "step": 11188 }, { "epoch": 1.62, "grad_norm": 6.720785140991211, "learning_rate": 9.3088565517751e-07, "loss": 0.5074, "step": 11189 }, { "epoch": 1.62, "grad_norm": 6.517723083496094, "learning_rate": 9.307277519577255e-07, "loss": 0.5482, "step": 11190 }, { "epoch": 1.62, "grad_norm": 6.101729393005371, "learning_rate": 9.305698504734441e-07, "loss": 0.4596, "step": 11191 }, { "epoch": 1.62, "grad_norm": 7.711059093475342, "learning_rate": 9.304119507286229e-07, "loss": 0.6227, "step": 11192 }, { "epoch": 1.62, "grad_norm": 6.069972991943359, "learning_rate": 9.302540527272167e-07, "loss": 0.4813, "step": 11193 }, { "epoch": 1.62, "grad_norm": 6.122331142425537, "learning_rate": 9.300961564731823e-07, "loss": 0.525, "step": 11194 }, { "epoch": 1.62, "grad_norm": 7.1470465660095215, "learning_rate": 9.299382619704748e-07, "loss": 0.5157, "step": 11195 }, { "epoch": 1.62, "grad_norm": 8.436793327331543, "learning_rate": 9.297803692230504e-07, "loss": 0.5559, "step": 11196 }, { "epoch": 1.62, "grad_norm": 6.590507507324219, "learning_rate": 9.29622478234865e-07, "loss": 0.526, "step": 11197 }, { "epoch": 1.62, "grad_norm": 5.795840263366699, "learning_rate": 9.294645890098736e-07, "loss": 0.5214, "step": 11198 }, { "epoch": 1.62, "grad_norm": 6.439801216125488, "learning_rate": 9.293067015520328e-07, "loss": 0.5255, "step": 11199 }, { "epoch": 1.63, "grad_norm": 6.7411932945251465, "learning_rate": 9.291488158652974e-07, "loss": 0.5277, "step": 11200 }, { "epoch": 1.63, "grad_norm": 7.02805233001709, "learning_rate": 9.289909319536235e-07, "loss": 0.5606, "step": 11201 }, { "epoch": 1.63, "grad_norm": 6.602179050445557, "learning_rate": 9.288330498209661e-07, "loss": 0.5897, "step": 11202 }, { "epoch": 1.63, "grad_norm": 6.248103141784668, "learning_rate": 9.286751694712814e-07, "loss": 0.5594, "step": 11203 }, { "epoch": 1.63, "grad_norm": 6.711710453033447, "learning_rate": 9.285172909085242e-07, "loss": 0.5248, "step": 11204 }, { "epoch": 1.63, "grad_norm": 6.580691814422607, "learning_rate": 9.283594141366503e-07, "loss": 0.5611, "step": 11205 }, { "epoch": 1.63, "grad_norm": 6.858890056610107, "learning_rate": 9.282015391596145e-07, "loss": 0.4676, "step": 11206 }, { "epoch": 1.63, "grad_norm": 5.917912006378174, "learning_rate": 9.28043665981373e-07, "loss": 0.5534, "step": 11207 }, { "epoch": 1.63, "grad_norm": 6.674899578094482, "learning_rate": 9.278857946058802e-07, "loss": 0.5486, "step": 11208 }, { "epoch": 1.63, "grad_norm": 6.644631862640381, "learning_rate": 9.27727925037092e-07, "loss": 0.5031, "step": 11209 }, { "epoch": 1.63, "grad_norm": 6.6779656410217285, "learning_rate": 9.275700572789626e-07, "loss": 0.562, "step": 11210 }, { "epoch": 1.63, "grad_norm": 5.69590950012207, "learning_rate": 9.274121913354482e-07, "loss": 0.5104, "step": 11211 }, { "epoch": 1.63, "grad_norm": 7.2165961265563965, "learning_rate": 9.272543272105032e-07, "loss": 0.5665, "step": 11212 }, { "epoch": 1.63, "grad_norm": 6.407463550567627, "learning_rate": 9.27096464908083e-07, "loss": 0.5074, "step": 11213 }, { "epoch": 1.63, "grad_norm": 6.494476318359375, "learning_rate": 9.269386044321423e-07, "loss": 0.5153, "step": 11214 }, { "epoch": 1.63, "grad_norm": 6.374520778656006, "learning_rate": 9.267807457866361e-07, "loss": 0.5105, "step": 11215 }, { "epoch": 1.63, "grad_norm": 6.536145210266113, "learning_rate": 9.266228889755193e-07, "loss": 0.5179, "step": 11216 }, { "epoch": 1.63, "grad_norm": 7.1742939949035645, "learning_rate": 9.264650340027471e-07, "loss": 0.5523, "step": 11217 }, { "epoch": 1.63, "grad_norm": 6.490302085876465, "learning_rate": 9.263071808722736e-07, "loss": 0.5362, "step": 11218 }, { "epoch": 1.63, "grad_norm": 6.286928176879883, "learning_rate": 9.261493295880543e-07, "loss": 0.5337, "step": 11219 }, { "epoch": 1.63, "grad_norm": 7.056689739227295, "learning_rate": 9.259914801540431e-07, "loss": 0.5344, "step": 11220 }, { "epoch": 1.63, "grad_norm": 6.160282611846924, "learning_rate": 9.258336325741957e-07, "loss": 0.4865, "step": 11221 }, { "epoch": 1.63, "grad_norm": 6.58102560043335, "learning_rate": 9.256757868524658e-07, "loss": 0.6249, "step": 11222 }, { "epoch": 1.63, "grad_norm": 6.698080539703369, "learning_rate": 9.255179429928081e-07, "loss": 0.6165, "step": 11223 }, { "epoch": 1.63, "grad_norm": 5.965936183929443, "learning_rate": 9.253601009991777e-07, "loss": 0.5577, "step": 11224 }, { "epoch": 1.63, "grad_norm": 6.732145309448242, "learning_rate": 9.252022608755283e-07, "loss": 0.5268, "step": 11225 }, { "epoch": 1.63, "grad_norm": 6.064966201782227, "learning_rate": 9.250444226258152e-07, "loss": 0.5199, "step": 11226 }, { "epoch": 1.63, "grad_norm": 6.508670806884766, "learning_rate": 9.24886586253992e-07, "loss": 0.5439, "step": 11227 }, { "epoch": 1.63, "grad_norm": 6.1178789138793945, "learning_rate": 9.247287517640134e-07, "loss": 0.4782, "step": 11228 }, { "epoch": 1.63, "grad_norm": 6.528371810913086, "learning_rate": 9.245709191598335e-07, "loss": 0.545, "step": 11229 }, { "epoch": 1.63, "grad_norm": 6.69594669342041, "learning_rate": 9.24413088445407e-07, "loss": 0.5148, "step": 11230 }, { "epoch": 1.63, "grad_norm": 6.261814594268799, "learning_rate": 9.242552596246873e-07, "loss": 0.5482, "step": 11231 }, { "epoch": 1.63, "grad_norm": 8.270356178283691, "learning_rate": 9.240974327016294e-07, "loss": 0.5631, "step": 11232 }, { "epoch": 1.63, "grad_norm": 6.979617118835449, "learning_rate": 9.239396076801867e-07, "loss": 0.5492, "step": 11233 }, { "epoch": 1.63, "grad_norm": 5.853080749511719, "learning_rate": 9.237817845643137e-07, "loss": 0.5249, "step": 11234 }, { "epoch": 1.63, "grad_norm": 7.03345251083374, "learning_rate": 9.236239633579642e-07, "loss": 0.5561, "step": 11235 }, { "epoch": 1.63, "grad_norm": 6.84084939956665, "learning_rate": 9.234661440650925e-07, "loss": 0.5926, "step": 11236 }, { "epoch": 1.63, "grad_norm": 5.932374477386475, "learning_rate": 9.233083266896516e-07, "loss": 0.4724, "step": 11237 }, { "epoch": 1.63, "grad_norm": 5.867280006408691, "learning_rate": 9.231505112355966e-07, "loss": 0.5776, "step": 11238 }, { "epoch": 1.63, "grad_norm": 7.064199924468994, "learning_rate": 9.2299269770688e-07, "loss": 0.6449, "step": 11239 }, { "epoch": 1.63, "grad_norm": 5.568716526031494, "learning_rate": 9.228348861074568e-07, "loss": 0.4937, "step": 11240 }, { "epoch": 1.63, "grad_norm": 6.582559108734131, "learning_rate": 9.226770764412796e-07, "loss": 0.5699, "step": 11241 }, { "epoch": 1.63, "grad_norm": 6.500956058502197, "learning_rate": 9.225192687123032e-07, "loss": 0.5162, "step": 11242 }, { "epoch": 1.63, "grad_norm": 7.357083320617676, "learning_rate": 9.223614629244804e-07, "loss": 0.5677, "step": 11243 }, { "epoch": 1.63, "grad_norm": 6.516482353210449, "learning_rate": 9.222036590817652e-07, "loss": 0.5351, "step": 11244 }, { "epoch": 1.63, "grad_norm": 6.312868118286133, "learning_rate": 9.220458571881105e-07, "loss": 0.5916, "step": 11245 }, { "epoch": 1.63, "grad_norm": 7.003152370452881, "learning_rate": 9.218880572474705e-07, "loss": 0.5506, "step": 11246 }, { "epoch": 1.63, "grad_norm": 6.928557395935059, "learning_rate": 9.217302592637979e-07, "loss": 0.5495, "step": 11247 }, { "epoch": 1.63, "grad_norm": 7.0637993812561035, "learning_rate": 9.21572463241047e-07, "loss": 0.5581, "step": 11248 }, { "epoch": 1.63, "grad_norm": 6.515810966491699, "learning_rate": 9.214146691831704e-07, "loss": 0.5596, "step": 11249 }, { "epoch": 1.63, "grad_norm": 6.506489276885986, "learning_rate": 9.212568770941213e-07, "loss": 0.5142, "step": 11250 }, { "epoch": 1.63, "grad_norm": 7.001704216003418, "learning_rate": 9.210990869778535e-07, "loss": 0.5232, "step": 11251 }, { "epoch": 1.63, "grad_norm": 7.07228422164917, "learning_rate": 9.209412988383197e-07, "loss": 0.5453, "step": 11252 }, { "epoch": 1.63, "grad_norm": 7.094634532928467, "learning_rate": 9.207835126794735e-07, "loss": 0.5523, "step": 11253 }, { "epoch": 1.63, "grad_norm": 6.313348770141602, "learning_rate": 9.206257285052672e-07, "loss": 0.4682, "step": 11254 }, { "epoch": 1.63, "grad_norm": 7.075217247009277, "learning_rate": 9.204679463196549e-07, "loss": 0.528, "step": 11255 }, { "epoch": 1.63, "grad_norm": 7.118937015533447, "learning_rate": 9.203101661265886e-07, "loss": 0.5671, "step": 11256 }, { "epoch": 1.63, "grad_norm": 7.312260150909424, "learning_rate": 9.201523879300219e-07, "loss": 0.5352, "step": 11257 }, { "epoch": 1.63, "grad_norm": 6.0069379806518555, "learning_rate": 9.19994611733907e-07, "loss": 0.5431, "step": 11258 }, { "epoch": 1.63, "grad_norm": 7.156398296356201, "learning_rate": 9.198368375421973e-07, "loss": 0.5281, "step": 11259 }, { "epoch": 1.63, "grad_norm": 7.874503135681152, "learning_rate": 9.196790653588452e-07, "loss": 0.6078, "step": 11260 }, { "epoch": 1.63, "grad_norm": 6.275465965270996, "learning_rate": 9.195212951878039e-07, "loss": 0.5883, "step": 11261 }, { "epoch": 1.63, "grad_norm": 6.503236293792725, "learning_rate": 9.193635270330255e-07, "loss": 0.5476, "step": 11262 }, { "epoch": 1.63, "grad_norm": 7.024585723876953, "learning_rate": 9.192057608984633e-07, "loss": 0.5998, "step": 11263 }, { "epoch": 1.63, "grad_norm": 7.316483020782471, "learning_rate": 9.190479967880691e-07, "loss": 0.6098, "step": 11264 }, { "epoch": 1.63, "grad_norm": 7.874190330505371, "learning_rate": 9.188902347057962e-07, "loss": 0.5321, "step": 11265 }, { "epoch": 1.63, "grad_norm": 5.841463565826416, "learning_rate": 9.187324746555962e-07, "loss": 0.4393, "step": 11266 }, { "epoch": 1.63, "grad_norm": 6.903000354766846, "learning_rate": 9.185747166414224e-07, "loss": 0.5243, "step": 11267 }, { "epoch": 1.63, "grad_norm": 7.366436958312988, "learning_rate": 9.184169606672264e-07, "loss": 0.6314, "step": 11268 }, { "epoch": 1.64, "grad_norm": 6.300002574920654, "learning_rate": 9.182592067369612e-07, "loss": 0.5152, "step": 11269 }, { "epoch": 1.64, "grad_norm": 6.093512535095215, "learning_rate": 9.181014548545787e-07, "loss": 0.5372, "step": 11270 }, { "epoch": 1.64, "grad_norm": 5.774624824523926, "learning_rate": 9.17943705024031e-07, "loss": 0.5121, "step": 11271 }, { "epoch": 1.64, "grad_norm": 5.954652309417725, "learning_rate": 9.177859572492707e-07, "loss": 0.4921, "step": 11272 }, { "epoch": 1.64, "grad_norm": 6.307178020477295, "learning_rate": 9.176282115342498e-07, "loss": 0.6028, "step": 11273 }, { "epoch": 1.64, "grad_norm": 6.976112365722656, "learning_rate": 9.174704678829196e-07, "loss": 0.5175, "step": 11274 }, { "epoch": 1.64, "grad_norm": 6.5404253005981445, "learning_rate": 9.173127262992328e-07, "loss": 0.5106, "step": 11275 }, { "epoch": 1.64, "grad_norm": 6.645050525665283, "learning_rate": 9.171549867871419e-07, "loss": 0.483, "step": 11276 }, { "epoch": 1.64, "grad_norm": 8.08030891418457, "learning_rate": 9.169972493505978e-07, "loss": 0.5476, "step": 11277 }, { "epoch": 1.64, "grad_norm": 5.788215160369873, "learning_rate": 9.16839513993553e-07, "loss": 0.5488, "step": 11278 }, { "epoch": 1.64, "grad_norm": 6.853733062744141, "learning_rate": 9.166817807199587e-07, "loss": 0.5301, "step": 11279 }, { "epoch": 1.64, "grad_norm": 6.4253458976745605, "learning_rate": 9.165240495337674e-07, "loss": 0.488, "step": 11280 }, { "epoch": 1.64, "grad_norm": 6.21142578125, "learning_rate": 9.1636632043893e-07, "loss": 0.5247, "step": 11281 }, { "epoch": 1.64, "grad_norm": 6.175763130187988, "learning_rate": 9.162085934393988e-07, "loss": 0.5032, "step": 11282 }, { "epoch": 1.64, "grad_norm": 6.73546028137207, "learning_rate": 9.16050868539125e-07, "loss": 0.5545, "step": 11283 }, { "epoch": 1.64, "grad_norm": 6.332516193389893, "learning_rate": 9.158931457420605e-07, "loss": 0.5572, "step": 11284 }, { "epoch": 1.64, "grad_norm": 6.056460857391357, "learning_rate": 9.157354250521564e-07, "loss": 0.4633, "step": 11285 }, { "epoch": 1.64, "grad_norm": 6.662850379943848, "learning_rate": 9.155777064733646e-07, "loss": 0.4975, "step": 11286 }, { "epoch": 1.64, "grad_norm": 6.597140312194824, "learning_rate": 9.154199900096357e-07, "loss": 0.5182, "step": 11287 }, { "epoch": 1.64, "grad_norm": 6.387645721435547, "learning_rate": 9.152622756649218e-07, "loss": 0.5616, "step": 11288 }, { "epoch": 1.64, "grad_norm": 7.946151256561279, "learning_rate": 9.151045634431737e-07, "loss": 0.52, "step": 11289 }, { "epoch": 1.64, "grad_norm": 6.24813985824585, "learning_rate": 9.149468533483432e-07, "loss": 0.5038, "step": 11290 }, { "epoch": 1.64, "grad_norm": 6.229738235473633, "learning_rate": 9.147891453843809e-07, "loss": 0.59, "step": 11291 }, { "epoch": 1.64, "grad_norm": 6.014469623565674, "learning_rate": 9.146314395552383e-07, "loss": 0.5293, "step": 11292 }, { "epoch": 1.64, "grad_norm": 6.960180759429932, "learning_rate": 9.144737358648657e-07, "loss": 0.4825, "step": 11293 }, { "epoch": 1.64, "grad_norm": 6.871638298034668, "learning_rate": 9.143160343172153e-07, "loss": 0.5402, "step": 11294 }, { "epoch": 1.64, "grad_norm": 6.8299031257629395, "learning_rate": 9.141583349162369e-07, "loss": 0.5324, "step": 11295 }, { "epoch": 1.64, "grad_norm": 6.045650482177734, "learning_rate": 9.140006376658825e-07, "loss": 0.5127, "step": 11296 }, { "epoch": 1.64, "grad_norm": 6.385275363922119, "learning_rate": 9.138429425701018e-07, "loss": 0.5446, "step": 11297 }, { "epoch": 1.64, "grad_norm": 5.971375942230225, "learning_rate": 9.136852496328468e-07, "loss": 0.4615, "step": 11298 }, { "epoch": 1.64, "grad_norm": 7.260488986968994, "learning_rate": 9.135275588580673e-07, "loss": 0.482, "step": 11299 }, { "epoch": 1.64, "grad_norm": 6.6857147216796875, "learning_rate": 9.133698702497146e-07, "loss": 0.5609, "step": 11300 }, { "epoch": 1.64, "grad_norm": 5.716835021972656, "learning_rate": 9.132121838117385e-07, "loss": 0.4959, "step": 11301 }, { "epoch": 1.64, "grad_norm": 6.039095878601074, "learning_rate": 9.130544995480901e-07, "loss": 0.5061, "step": 11302 }, { "epoch": 1.64, "grad_norm": 6.175858020782471, "learning_rate": 9.128968174627205e-07, "loss": 0.5188, "step": 11303 }, { "epoch": 1.64, "grad_norm": 5.767770290374756, "learning_rate": 9.127391375595793e-07, "loss": 0.5075, "step": 11304 }, { "epoch": 1.64, "grad_norm": 6.861833572387695, "learning_rate": 9.125814598426175e-07, "loss": 0.4788, "step": 11305 }, { "epoch": 1.64, "grad_norm": 6.14913272857666, "learning_rate": 9.124237843157847e-07, "loss": 0.545, "step": 11306 }, { "epoch": 1.64, "grad_norm": 6.085928916931152, "learning_rate": 9.122661109830321e-07, "loss": 0.5034, "step": 11307 }, { "epoch": 1.64, "grad_norm": 7.161045074462891, "learning_rate": 9.121084398483092e-07, "loss": 0.514, "step": 11308 }, { "epoch": 1.64, "grad_norm": 6.322582244873047, "learning_rate": 9.119507709155669e-07, "loss": 0.5819, "step": 11309 }, { "epoch": 1.64, "grad_norm": 6.562928199768066, "learning_rate": 9.117931041887546e-07, "loss": 0.4922, "step": 11310 }, { "epoch": 1.64, "grad_norm": 7.538418769836426, "learning_rate": 9.116354396718231e-07, "loss": 0.4642, "step": 11311 }, { "epoch": 1.64, "grad_norm": 6.288376808166504, "learning_rate": 9.114777773687218e-07, "loss": 0.5725, "step": 11312 }, { "epoch": 1.64, "grad_norm": 7.245666027069092, "learning_rate": 9.113201172834013e-07, "loss": 0.6052, "step": 11313 }, { "epoch": 1.64, "grad_norm": 6.292091369628906, "learning_rate": 9.111624594198107e-07, "loss": 0.529, "step": 11314 }, { "epoch": 1.64, "grad_norm": 5.8362298011779785, "learning_rate": 9.110048037819008e-07, "loss": 0.5406, "step": 11315 }, { "epoch": 1.64, "grad_norm": 6.535270690917969, "learning_rate": 9.108471503736205e-07, "loss": 0.4825, "step": 11316 }, { "epoch": 1.64, "grad_norm": 8.101104736328125, "learning_rate": 9.106894991989203e-07, "loss": 0.4985, "step": 11317 }, { "epoch": 1.64, "grad_norm": 6.920055389404297, "learning_rate": 9.105318502617495e-07, "loss": 0.4945, "step": 11318 }, { "epoch": 1.64, "grad_norm": 5.886148452758789, "learning_rate": 9.103742035660577e-07, "loss": 0.4863, "step": 11319 }, { "epoch": 1.64, "grad_norm": 6.993255615234375, "learning_rate": 9.102165591157948e-07, "loss": 0.6132, "step": 11320 }, { "epoch": 1.64, "grad_norm": 6.427761554718018, "learning_rate": 9.100589169149102e-07, "loss": 0.4947, "step": 11321 }, { "epoch": 1.64, "grad_norm": 6.289068222045898, "learning_rate": 9.09901276967353e-07, "loss": 0.4963, "step": 11322 }, { "epoch": 1.64, "grad_norm": 6.708484172821045, "learning_rate": 9.097436392770732e-07, "loss": 0.5925, "step": 11323 }, { "epoch": 1.64, "grad_norm": 6.592246055603027, "learning_rate": 9.095860038480194e-07, "loss": 0.5881, "step": 11324 }, { "epoch": 1.64, "grad_norm": 6.578897953033447, "learning_rate": 9.094283706841419e-07, "loss": 0.512, "step": 11325 }, { "epoch": 1.64, "grad_norm": 5.888971328735352, "learning_rate": 9.092707397893891e-07, "loss": 0.5056, "step": 11326 }, { "epoch": 1.64, "grad_norm": 8.498394966125488, "learning_rate": 9.091131111677105e-07, "loss": 0.6982, "step": 11327 }, { "epoch": 1.64, "grad_norm": 6.777777671813965, "learning_rate": 9.089554848230555e-07, "loss": 0.555, "step": 11328 }, { "epoch": 1.64, "grad_norm": 6.364279747009277, "learning_rate": 9.087978607593725e-07, "loss": 0.5077, "step": 11329 }, { "epoch": 1.64, "grad_norm": 6.086674213409424, "learning_rate": 9.086402389806112e-07, "loss": 0.5743, "step": 11330 }, { "epoch": 1.64, "grad_norm": 6.058693885803223, "learning_rate": 9.0848261949072e-07, "loss": 0.537, "step": 11331 }, { "epoch": 1.64, "grad_norm": 6.219287872314453, "learning_rate": 9.083250022936484e-07, "loss": 0.5103, "step": 11332 }, { "epoch": 1.64, "grad_norm": 6.345791339874268, "learning_rate": 9.081673873933447e-07, "loss": 0.5334, "step": 11333 }, { "epoch": 1.64, "grad_norm": 5.838837146759033, "learning_rate": 9.080097747937583e-07, "loss": 0.4759, "step": 11334 }, { "epoch": 1.64, "grad_norm": 6.711132526397705, "learning_rate": 9.078521644988368e-07, "loss": 0.5963, "step": 11335 }, { "epoch": 1.64, "grad_norm": 6.561613082885742, "learning_rate": 9.076945565125303e-07, "loss": 0.5222, "step": 11336 }, { "epoch": 1.64, "grad_norm": 7.017599582672119, "learning_rate": 9.075369508387861e-07, "loss": 0.5036, "step": 11337 }, { "epoch": 1.65, "grad_norm": 6.49415397644043, "learning_rate": 9.073793474815538e-07, "loss": 0.5163, "step": 11338 }, { "epoch": 1.65, "grad_norm": 6.12205696105957, "learning_rate": 9.072217464447813e-07, "loss": 0.5455, "step": 11339 }, { "epoch": 1.65, "grad_norm": 6.043797492980957, "learning_rate": 9.070641477324172e-07, "loss": 0.4853, "step": 11340 }, { "epoch": 1.65, "grad_norm": 6.233927249908447, "learning_rate": 9.069065513484098e-07, "loss": 0.5099, "step": 11341 }, { "epoch": 1.65, "grad_norm": 7.263830184936523, "learning_rate": 9.067489572967079e-07, "loss": 0.5172, "step": 11342 }, { "epoch": 1.65, "grad_norm": 5.907618999481201, "learning_rate": 9.065913655812587e-07, "loss": 0.5216, "step": 11343 }, { "epoch": 1.65, "grad_norm": 6.442469596862793, "learning_rate": 9.064337762060116e-07, "loss": 0.5699, "step": 11344 }, { "epoch": 1.65, "grad_norm": 6.914044380187988, "learning_rate": 9.062761891749138e-07, "loss": 0.6008, "step": 11345 }, { "epoch": 1.65, "grad_norm": 6.245284557342529, "learning_rate": 9.061186044919141e-07, "loss": 0.4749, "step": 11346 }, { "epoch": 1.65, "grad_norm": 7.368212699890137, "learning_rate": 9.059610221609601e-07, "loss": 0.5578, "step": 11347 }, { "epoch": 1.65, "grad_norm": 6.9194817543029785, "learning_rate": 9.058034421860001e-07, "loss": 0.596, "step": 11348 }, { "epoch": 1.65, "grad_norm": 6.784844875335693, "learning_rate": 9.056458645709814e-07, "loss": 0.5367, "step": 11349 }, { "epoch": 1.65, "grad_norm": 6.0937724113464355, "learning_rate": 9.054882893198527e-07, "loss": 0.5371, "step": 11350 }, { "epoch": 1.65, "grad_norm": 6.800149440765381, "learning_rate": 9.053307164365608e-07, "loss": 0.5447, "step": 11351 }, { "epoch": 1.65, "grad_norm": 6.9369215965271, "learning_rate": 9.051731459250544e-07, "loss": 0.6052, "step": 11352 }, { "epoch": 1.65, "grad_norm": 5.81428861618042, "learning_rate": 9.050155777892805e-07, "loss": 0.5388, "step": 11353 }, { "epoch": 1.65, "grad_norm": 6.447391986846924, "learning_rate": 9.048580120331871e-07, "loss": 0.5063, "step": 11354 }, { "epoch": 1.65, "grad_norm": 6.305027484893799, "learning_rate": 9.047004486607218e-07, "loss": 0.5581, "step": 11355 }, { "epoch": 1.65, "grad_norm": 6.347402572631836, "learning_rate": 9.045428876758315e-07, "loss": 0.4751, "step": 11356 }, { "epoch": 1.65, "grad_norm": 6.771570682525635, "learning_rate": 9.043853290824645e-07, "loss": 0.4566, "step": 11357 }, { "epoch": 1.65, "grad_norm": 6.325990676879883, "learning_rate": 9.042277728845672e-07, "loss": 0.6053, "step": 11358 }, { "epoch": 1.65, "grad_norm": 7.4848222732543945, "learning_rate": 9.040702190860879e-07, "loss": 0.5885, "step": 11359 }, { "epoch": 1.65, "grad_norm": 5.752196311950684, "learning_rate": 9.039126676909733e-07, "loss": 0.4693, "step": 11360 }, { "epoch": 1.65, "grad_norm": 5.870534420013428, "learning_rate": 9.037551187031708e-07, "loss": 0.4579, "step": 11361 }, { "epoch": 1.65, "grad_norm": 7.3697662353515625, "learning_rate": 9.03597572126627e-07, "loss": 0.5652, "step": 11362 }, { "epoch": 1.65, "grad_norm": 6.401963233947754, "learning_rate": 9.034400279652898e-07, "loss": 0.5014, "step": 11363 }, { "epoch": 1.65, "grad_norm": 7.02504825592041, "learning_rate": 9.032824862231054e-07, "loss": 0.5467, "step": 11364 }, { "epoch": 1.65, "grad_norm": 6.564637660980225, "learning_rate": 9.031249469040216e-07, "loss": 0.575, "step": 11365 }, { "epoch": 1.65, "grad_norm": 6.10776948928833, "learning_rate": 9.029674100119844e-07, "loss": 0.5132, "step": 11366 }, { "epoch": 1.65, "grad_norm": 7.223114013671875, "learning_rate": 9.028098755509416e-07, "loss": 0.5626, "step": 11367 }, { "epoch": 1.65, "grad_norm": 6.827312469482422, "learning_rate": 9.026523435248391e-07, "loss": 0.5989, "step": 11368 }, { "epoch": 1.65, "grad_norm": 7.092174530029297, "learning_rate": 9.024948139376241e-07, "loss": 0.5032, "step": 11369 }, { "epoch": 1.65, "grad_norm": 6.998037338256836, "learning_rate": 9.023372867932429e-07, "loss": 0.5021, "step": 11370 }, { "epoch": 1.65, "grad_norm": 6.012771129608154, "learning_rate": 9.021797620956426e-07, "loss": 0.4587, "step": 11371 }, { "epoch": 1.65, "grad_norm": 6.798038482666016, "learning_rate": 9.02022239848769e-07, "loss": 0.5797, "step": 11372 }, { "epoch": 1.65, "grad_norm": 6.98175573348999, "learning_rate": 9.018647200565694e-07, "loss": 0.6575, "step": 11373 }, { "epoch": 1.65, "grad_norm": 6.816182613372803, "learning_rate": 9.017072027229897e-07, "loss": 0.6112, "step": 11374 }, { "epoch": 1.65, "grad_norm": 8.098642349243164, "learning_rate": 9.015496878519762e-07, "loss": 0.5311, "step": 11375 }, { "epoch": 1.65, "grad_norm": 7.427706241607666, "learning_rate": 9.013921754474754e-07, "loss": 0.601, "step": 11376 }, { "epoch": 1.65, "grad_norm": 6.152124404907227, "learning_rate": 9.012346655134335e-07, "loss": 0.5025, "step": 11377 }, { "epoch": 1.65, "grad_norm": 6.232525825500488, "learning_rate": 9.010771580537962e-07, "loss": 0.5252, "step": 11378 }, { "epoch": 1.65, "grad_norm": 6.240371227264404, "learning_rate": 9.009196530725101e-07, "loss": 0.5985, "step": 11379 }, { "epoch": 1.65, "grad_norm": 6.564528465270996, "learning_rate": 9.007621505735215e-07, "loss": 0.5235, "step": 11380 }, { "epoch": 1.65, "grad_norm": 7.852238655090332, "learning_rate": 9.006046505607757e-07, "loss": 0.5213, "step": 11381 }, { "epoch": 1.65, "grad_norm": 6.467430591583252, "learning_rate": 9.004471530382189e-07, "loss": 0.578, "step": 11382 }, { "epoch": 1.65, "grad_norm": 5.959080219268799, "learning_rate": 9.002896580097967e-07, "loss": 0.5313, "step": 11383 }, { "epoch": 1.65, "grad_norm": 6.051852226257324, "learning_rate": 9.001321654794556e-07, "loss": 0.4558, "step": 11384 }, { "epoch": 1.65, "grad_norm": 5.230162143707275, "learning_rate": 8.999746754511402e-07, "loss": 0.4563, "step": 11385 }, { "epoch": 1.65, "grad_norm": 6.976851940155029, "learning_rate": 8.998171879287974e-07, "loss": 0.5518, "step": 11386 }, { "epoch": 1.65, "grad_norm": 6.845064640045166, "learning_rate": 8.996597029163718e-07, "loss": 0.4896, "step": 11387 }, { "epoch": 1.65, "grad_norm": 6.7179765701293945, "learning_rate": 8.995022204178094e-07, "loss": 0.5501, "step": 11388 }, { "epoch": 1.65, "grad_norm": 6.363683700561523, "learning_rate": 8.993447404370557e-07, "loss": 0.5548, "step": 11389 }, { "epoch": 1.65, "grad_norm": 6.887206554412842, "learning_rate": 8.99187262978056e-07, "loss": 0.5759, "step": 11390 }, { "epoch": 1.65, "grad_norm": 6.466920375823975, "learning_rate": 8.990297880447553e-07, "loss": 0.5433, "step": 11391 }, { "epoch": 1.65, "grad_norm": 6.449908256530762, "learning_rate": 8.988723156410996e-07, "loss": 0.55, "step": 11392 }, { "epoch": 1.65, "grad_norm": 7.043704986572266, "learning_rate": 8.987148457710332e-07, "loss": 0.4946, "step": 11393 }, { "epoch": 1.65, "grad_norm": 6.081231117248535, "learning_rate": 8.985573784385023e-07, "loss": 0.4971, "step": 11394 }, { "epoch": 1.65, "grad_norm": 6.49567174911499, "learning_rate": 8.983999136474512e-07, "loss": 0.5213, "step": 11395 }, { "epoch": 1.65, "grad_norm": 7.732484340667725, "learning_rate": 8.982424514018254e-07, "loss": 0.5464, "step": 11396 }, { "epoch": 1.65, "grad_norm": 6.654632091522217, "learning_rate": 8.980849917055691e-07, "loss": 0.6214, "step": 11397 }, { "epoch": 1.65, "grad_norm": 6.8838396072387695, "learning_rate": 8.979275345626283e-07, "loss": 0.5406, "step": 11398 }, { "epoch": 1.65, "grad_norm": 6.992110729217529, "learning_rate": 8.977700799769469e-07, "loss": 0.548, "step": 11399 }, { "epoch": 1.65, "grad_norm": 6.441586494445801, "learning_rate": 8.976126279524704e-07, "loss": 0.5849, "step": 11400 }, { "epoch": 1.65, "grad_norm": 6.770349025726318, "learning_rate": 8.974551784931428e-07, "loss": 0.5113, "step": 11401 }, { "epoch": 1.65, "grad_norm": 5.726381778717041, "learning_rate": 8.972977316029094e-07, "loss": 0.5005, "step": 11402 }, { "epoch": 1.65, "grad_norm": 5.832404136657715, "learning_rate": 8.971402872857143e-07, "loss": 0.5363, "step": 11403 }, { "epoch": 1.65, "grad_norm": 6.055409908294678, "learning_rate": 8.969828455455023e-07, "loss": 0.5297, "step": 11404 }, { "epoch": 1.65, "grad_norm": 6.427262306213379, "learning_rate": 8.968254063862174e-07, "loss": 0.5726, "step": 11405 }, { "epoch": 1.65, "grad_norm": 6.244687557220459, "learning_rate": 8.966679698118043e-07, "loss": 0.5171, "step": 11406 }, { "epoch": 1.66, "grad_norm": 7.947829246520996, "learning_rate": 8.965105358262076e-07, "loss": 0.6459, "step": 11407 }, { "epoch": 1.66, "grad_norm": 6.230454444885254, "learning_rate": 8.963531044333712e-07, "loss": 0.4999, "step": 11408 }, { "epoch": 1.66, "grad_norm": 6.38355827331543, "learning_rate": 8.961956756372395e-07, "loss": 0.5391, "step": 11409 }, { "epoch": 1.66, "grad_norm": 6.275200366973877, "learning_rate": 8.960382494417561e-07, "loss": 0.5799, "step": 11410 }, { "epoch": 1.66, "grad_norm": 7.667176723480225, "learning_rate": 8.958808258508659e-07, "loss": 0.5208, "step": 11411 }, { "epoch": 1.66, "grad_norm": 6.532479286193848, "learning_rate": 8.957234048685119e-07, "loss": 0.5095, "step": 11412 }, { "epoch": 1.66, "grad_norm": 6.279545783996582, "learning_rate": 8.95565986498639e-07, "loss": 0.496, "step": 11413 }, { "epoch": 1.66, "grad_norm": 6.562435626983643, "learning_rate": 8.954085707451903e-07, "loss": 0.4755, "step": 11414 }, { "epoch": 1.66, "grad_norm": 6.991790294647217, "learning_rate": 8.952511576121103e-07, "loss": 0.5301, "step": 11415 }, { "epoch": 1.66, "grad_norm": 6.712976932525635, "learning_rate": 8.95093747103342e-07, "loss": 0.6515, "step": 11416 }, { "epoch": 1.66, "grad_norm": 5.899078845977783, "learning_rate": 8.949363392228298e-07, "loss": 0.5468, "step": 11417 }, { "epoch": 1.66, "grad_norm": 6.171853065490723, "learning_rate": 8.947789339745164e-07, "loss": 0.5025, "step": 11418 }, { "epoch": 1.66, "grad_norm": 6.227407455444336, "learning_rate": 8.946215313623464e-07, "loss": 0.5312, "step": 11419 }, { "epoch": 1.66, "grad_norm": 6.847017765045166, "learning_rate": 8.944641313902622e-07, "loss": 0.6009, "step": 11420 }, { "epoch": 1.66, "grad_norm": 6.623576641082764, "learning_rate": 8.94306734062208e-07, "loss": 0.4814, "step": 11421 }, { "epoch": 1.66, "grad_norm": 6.244097709655762, "learning_rate": 8.941493393821265e-07, "loss": 0.5469, "step": 11422 }, { "epoch": 1.66, "grad_norm": 6.133369445800781, "learning_rate": 8.939919473539619e-07, "loss": 0.5044, "step": 11423 }, { "epoch": 1.66, "grad_norm": 6.710540294647217, "learning_rate": 8.938345579816565e-07, "loss": 0.5424, "step": 11424 }, { "epoch": 1.66, "grad_norm": 6.327604293823242, "learning_rate": 8.936771712691538e-07, "loss": 0.4772, "step": 11425 }, { "epoch": 1.66, "grad_norm": 6.684698581695557, "learning_rate": 8.935197872203966e-07, "loss": 0.5307, "step": 11426 }, { "epoch": 1.66, "grad_norm": 6.176840305328369, "learning_rate": 8.933624058393285e-07, "loss": 0.4927, "step": 11427 }, { "epoch": 1.66, "grad_norm": 6.302789688110352, "learning_rate": 8.932050271298917e-07, "loss": 0.5602, "step": 11428 }, { "epoch": 1.66, "grad_norm": 6.8185319900512695, "learning_rate": 8.930476510960296e-07, "loss": 0.4744, "step": 11429 }, { "epoch": 1.66, "grad_norm": 5.83183479309082, "learning_rate": 8.928902777416848e-07, "loss": 0.5412, "step": 11430 }, { "epoch": 1.66, "grad_norm": 6.054202556610107, "learning_rate": 8.927329070708001e-07, "loss": 0.4706, "step": 11431 }, { "epoch": 1.66, "grad_norm": 6.238205909729004, "learning_rate": 8.92575539087318e-07, "loss": 0.4283, "step": 11432 }, { "epoch": 1.66, "grad_norm": 6.348437309265137, "learning_rate": 8.924181737951811e-07, "loss": 0.5846, "step": 11433 }, { "epoch": 1.66, "grad_norm": 5.937490940093994, "learning_rate": 8.922608111983323e-07, "loss": 0.4928, "step": 11434 }, { "epoch": 1.66, "grad_norm": 6.2988104820251465, "learning_rate": 8.921034513007136e-07, "loss": 0.5355, "step": 11435 }, { "epoch": 1.66, "grad_norm": 5.984801292419434, "learning_rate": 8.919460941062679e-07, "loss": 0.5589, "step": 11436 }, { "epoch": 1.66, "grad_norm": 6.3018012046813965, "learning_rate": 8.91788739618937e-07, "loss": 0.5164, "step": 11437 }, { "epoch": 1.66, "grad_norm": 6.45216178894043, "learning_rate": 8.916313878426637e-07, "loss": 0.5268, "step": 11438 }, { "epoch": 1.66, "grad_norm": 6.906999588012695, "learning_rate": 8.914740387813896e-07, "loss": 0.5371, "step": 11439 }, { "epoch": 1.66, "grad_norm": 6.450026035308838, "learning_rate": 8.913166924390573e-07, "loss": 0.5284, "step": 11440 }, { "epoch": 1.66, "grad_norm": 6.430149555206299, "learning_rate": 8.911593488196085e-07, "loss": 0.4981, "step": 11441 }, { "epoch": 1.66, "grad_norm": 5.808304309844971, "learning_rate": 8.910020079269857e-07, "loss": 0.5023, "step": 11442 }, { "epoch": 1.66, "grad_norm": 7.062308311462402, "learning_rate": 8.908446697651302e-07, "loss": 0.5288, "step": 11443 }, { "epoch": 1.66, "grad_norm": 6.335904121398926, "learning_rate": 8.906873343379842e-07, "loss": 0.5639, "step": 11444 }, { "epoch": 1.66, "grad_norm": 7.9886651039123535, "learning_rate": 8.905300016494895e-07, "loss": 0.4974, "step": 11445 }, { "epoch": 1.66, "grad_norm": 6.8794379234313965, "learning_rate": 8.903726717035878e-07, "loss": 0.4956, "step": 11446 }, { "epoch": 1.66, "grad_norm": 6.701355934143066, "learning_rate": 8.902153445042203e-07, "loss": 0.5878, "step": 11447 }, { "epoch": 1.66, "grad_norm": 6.56822395324707, "learning_rate": 8.900580200553294e-07, "loss": 0.5146, "step": 11448 }, { "epoch": 1.66, "grad_norm": 7.575230121612549, "learning_rate": 8.899006983608557e-07, "loss": 0.6024, "step": 11449 }, { "epoch": 1.66, "grad_norm": 7.305602550506592, "learning_rate": 8.897433794247417e-07, "loss": 0.5211, "step": 11450 }, { "epoch": 1.66, "grad_norm": 5.919574737548828, "learning_rate": 8.895860632509278e-07, "loss": 0.517, "step": 11451 }, { "epoch": 1.66, "grad_norm": 7.672133445739746, "learning_rate": 8.894287498433559e-07, "loss": 0.5842, "step": 11452 }, { "epoch": 1.66, "grad_norm": 5.8759050369262695, "learning_rate": 8.892714392059667e-07, "loss": 0.5155, "step": 11453 }, { "epoch": 1.66, "grad_norm": 6.317338466644287, "learning_rate": 8.89114131342702e-07, "loss": 0.4869, "step": 11454 }, { "epoch": 1.66, "grad_norm": 7.308032512664795, "learning_rate": 8.889568262575022e-07, "loss": 0.5806, "step": 11455 }, { "epoch": 1.66, "grad_norm": 6.153112411499023, "learning_rate": 8.887995239543092e-07, "loss": 0.529, "step": 11456 }, { "epoch": 1.66, "grad_norm": 6.751037120819092, "learning_rate": 8.886422244370629e-07, "loss": 0.5411, "step": 11457 }, { "epoch": 1.66, "grad_norm": 7.586342811584473, "learning_rate": 8.884849277097049e-07, "loss": 0.5533, "step": 11458 }, { "epoch": 1.66, "grad_norm": 6.169722080230713, "learning_rate": 8.883276337761761e-07, "loss": 0.5246, "step": 11459 }, { "epoch": 1.66, "grad_norm": 6.701028823852539, "learning_rate": 8.881703426404165e-07, "loss": 0.5566, "step": 11460 }, { "epoch": 1.66, "grad_norm": 5.388322353363037, "learning_rate": 8.880130543063678e-07, "loss": 0.4805, "step": 11461 }, { "epoch": 1.66, "grad_norm": 6.995598316192627, "learning_rate": 8.878557687779697e-07, "loss": 0.5584, "step": 11462 }, { "epoch": 1.66, "grad_norm": 6.8246588706970215, "learning_rate": 8.876984860591635e-07, "loss": 0.5631, "step": 11463 }, { "epoch": 1.66, "grad_norm": 7.389024257659912, "learning_rate": 8.875412061538888e-07, "loss": 0.5068, "step": 11464 }, { "epoch": 1.66, "grad_norm": 6.8309407234191895, "learning_rate": 8.873839290660869e-07, "loss": 0.5894, "step": 11465 }, { "epoch": 1.66, "grad_norm": 6.503073215484619, "learning_rate": 8.872266547996971e-07, "loss": 0.5288, "step": 11466 }, { "epoch": 1.66, "grad_norm": 7.17031192779541, "learning_rate": 8.870693833586609e-07, "loss": 0.5191, "step": 11467 }, { "epoch": 1.66, "grad_norm": 6.600474834442139, "learning_rate": 8.869121147469172e-07, "loss": 0.5115, "step": 11468 }, { "epoch": 1.66, "grad_norm": 6.472403526306152, "learning_rate": 8.867548489684071e-07, "loss": 0.4705, "step": 11469 }, { "epoch": 1.66, "grad_norm": 6.42606782913208, "learning_rate": 8.865975860270699e-07, "loss": 0.5248, "step": 11470 }, { "epoch": 1.66, "grad_norm": 6.180947303771973, "learning_rate": 8.864403259268464e-07, "loss": 0.5387, "step": 11471 }, { "epoch": 1.66, "grad_norm": 6.975754261016846, "learning_rate": 8.862830686716758e-07, "loss": 0.5789, "step": 11472 }, { "epoch": 1.66, "grad_norm": 6.333518028259277, "learning_rate": 8.861258142654983e-07, "loss": 0.5336, "step": 11473 }, { "epoch": 1.66, "grad_norm": 6.562036991119385, "learning_rate": 8.85968562712253e-07, "loss": 0.5832, "step": 11474 }, { "epoch": 1.66, "grad_norm": 7.333395957946777, "learning_rate": 8.858113140158806e-07, "loss": 0.5476, "step": 11475 }, { "epoch": 1.67, "grad_norm": 7.039994716644287, "learning_rate": 8.856540681803197e-07, "loss": 0.5524, "step": 11476 }, { "epoch": 1.67, "grad_norm": 6.2106614112854, "learning_rate": 8.854968252095109e-07, "loss": 0.521, "step": 11477 }, { "epoch": 1.67, "grad_norm": 6.7664794921875, "learning_rate": 8.853395851073928e-07, "loss": 0.5678, "step": 11478 }, { "epoch": 1.67, "grad_norm": 6.419461727142334, "learning_rate": 8.85182347877905e-07, "loss": 0.5306, "step": 11479 }, { "epoch": 1.67, "grad_norm": 7.021646976470947, "learning_rate": 8.850251135249871e-07, "loss": 0.4699, "step": 11480 }, { "epoch": 1.67, "grad_norm": 6.189949035644531, "learning_rate": 8.848678820525782e-07, "loss": 0.501, "step": 11481 }, { "epoch": 1.67, "grad_norm": 7.102671146392822, "learning_rate": 8.847106534646171e-07, "loss": 0.6434, "step": 11482 }, { "epoch": 1.67, "grad_norm": 7.277371883392334, "learning_rate": 8.845534277650438e-07, "loss": 0.5521, "step": 11483 }, { "epoch": 1.67, "grad_norm": 5.86121129989624, "learning_rate": 8.843962049577961e-07, "loss": 0.5037, "step": 11484 }, { "epoch": 1.67, "grad_norm": 6.437141418457031, "learning_rate": 8.84238985046814e-07, "loss": 0.6251, "step": 11485 }, { "epoch": 1.67, "grad_norm": 5.545197010040283, "learning_rate": 8.840817680360362e-07, "loss": 0.4811, "step": 11486 }, { "epoch": 1.67, "grad_norm": 6.22171688079834, "learning_rate": 8.83924553929401e-07, "loss": 0.4588, "step": 11487 }, { "epoch": 1.67, "grad_norm": 6.853153705596924, "learning_rate": 8.83767342730848e-07, "loss": 0.5202, "step": 11488 }, { "epoch": 1.67, "grad_norm": 5.984556198120117, "learning_rate": 8.836101344443148e-07, "loss": 0.4742, "step": 11489 }, { "epoch": 1.67, "grad_norm": 6.861990451812744, "learning_rate": 8.83452929073741e-07, "loss": 0.5182, "step": 11490 }, { "epoch": 1.67, "grad_norm": 6.3427910804748535, "learning_rate": 8.832957266230642e-07, "loss": 0.5168, "step": 11491 }, { "epoch": 1.67, "grad_norm": 7.339893817901611, "learning_rate": 8.83138527096224e-07, "loss": 0.4968, "step": 11492 }, { "epoch": 1.67, "grad_norm": 6.816551685333252, "learning_rate": 8.829813304971578e-07, "loss": 0.5233, "step": 11493 }, { "epoch": 1.67, "grad_norm": 5.673855304718018, "learning_rate": 8.828241368298044e-07, "loss": 0.5156, "step": 11494 }, { "epoch": 1.67, "grad_norm": 6.652675628662109, "learning_rate": 8.826669460981014e-07, "loss": 0.5176, "step": 11495 }, { "epoch": 1.67, "grad_norm": 7.300743103027344, "learning_rate": 8.82509758305988e-07, "loss": 0.5893, "step": 11496 }, { "epoch": 1.67, "grad_norm": 6.671887397766113, "learning_rate": 8.823525734574011e-07, "loss": 0.6007, "step": 11497 }, { "epoch": 1.67, "grad_norm": 5.580245494842529, "learning_rate": 8.821953915562799e-07, "loss": 0.4766, "step": 11498 }, { "epoch": 1.67, "grad_norm": 5.973719120025635, "learning_rate": 8.820382126065615e-07, "loss": 0.4894, "step": 11499 }, { "epoch": 1.67, "grad_norm": 6.347026824951172, "learning_rate": 8.81881036612184e-07, "loss": 0.5692, "step": 11500 }, { "epoch": 1.67, "grad_norm": 6.355182647705078, "learning_rate": 8.817238635770852e-07, "loss": 0.4522, "step": 11501 }, { "epoch": 1.67, "grad_norm": 5.935766220092773, "learning_rate": 8.81566693505203e-07, "loss": 0.5246, "step": 11502 }, { "epoch": 1.67, "grad_norm": 6.947428226470947, "learning_rate": 8.814095264004744e-07, "loss": 0.5656, "step": 11503 }, { "epoch": 1.67, "grad_norm": 6.423730373382568, "learning_rate": 8.812523622668378e-07, "loss": 0.5633, "step": 11504 }, { "epoch": 1.67, "grad_norm": 6.990110874176025, "learning_rate": 8.810952011082301e-07, "loss": 0.5434, "step": 11505 }, { "epoch": 1.67, "grad_norm": 6.59893274307251, "learning_rate": 8.809380429285893e-07, "loss": 0.5534, "step": 11506 }, { "epoch": 1.67, "grad_norm": 6.679605484008789, "learning_rate": 8.807808877318522e-07, "loss": 0.4996, "step": 11507 }, { "epoch": 1.67, "grad_norm": 7.135215759277344, "learning_rate": 8.806237355219562e-07, "loss": 0.5801, "step": 11508 }, { "epoch": 1.67, "grad_norm": 6.594480514526367, "learning_rate": 8.804665863028383e-07, "loss": 0.5101, "step": 11509 }, { "epoch": 1.67, "grad_norm": 6.618789196014404, "learning_rate": 8.803094400784359e-07, "loss": 0.597, "step": 11510 }, { "epoch": 1.67, "grad_norm": 6.813790321350098, "learning_rate": 8.801522968526865e-07, "loss": 0.5839, "step": 11511 }, { "epoch": 1.67, "grad_norm": 6.942200183868408, "learning_rate": 8.799951566295262e-07, "loss": 0.4483, "step": 11512 }, { "epoch": 1.67, "grad_norm": 6.352244853973389, "learning_rate": 8.798380194128922e-07, "loss": 0.5515, "step": 11513 }, { "epoch": 1.67, "grad_norm": 7.13092041015625, "learning_rate": 8.796808852067216e-07, "loss": 0.506, "step": 11514 }, { "epoch": 1.67, "grad_norm": 5.9263386726379395, "learning_rate": 8.79523754014951e-07, "loss": 0.4862, "step": 11515 }, { "epoch": 1.67, "grad_norm": 6.20629358291626, "learning_rate": 8.793666258415166e-07, "loss": 0.5659, "step": 11516 }, { "epoch": 1.67, "grad_norm": 6.237661838531494, "learning_rate": 8.792095006903557e-07, "loss": 0.405, "step": 11517 }, { "epoch": 1.67, "grad_norm": 6.798319339752197, "learning_rate": 8.790523785654043e-07, "loss": 0.5399, "step": 11518 }, { "epoch": 1.67, "grad_norm": 6.343034744262695, "learning_rate": 8.788952594705992e-07, "loss": 0.5659, "step": 11519 }, { "epoch": 1.67, "grad_norm": 7.266232013702393, "learning_rate": 8.787381434098766e-07, "loss": 0.626, "step": 11520 }, { "epoch": 1.67, "grad_norm": 6.557413578033447, "learning_rate": 8.785810303871729e-07, "loss": 0.5287, "step": 11521 }, { "epoch": 1.67, "grad_norm": 6.112056732177734, "learning_rate": 8.784239204064238e-07, "loss": 0.5528, "step": 11522 }, { "epoch": 1.67, "grad_norm": 5.944216251373291, "learning_rate": 8.782668134715662e-07, "loss": 0.5428, "step": 11523 }, { "epoch": 1.67, "grad_norm": 5.221056938171387, "learning_rate": 8.781097095865355e-07, "loss": 0.4787, "step": 11524 }, { "epoch": 1.67, "grad_norm": 5.887960433959961, "learning_rate": 8.779526087552682e-07, "loss": 0.5425, "step": 11525 }, { "epoch": 1.67, "grad_norm": 6.9871296882629395, "learning_rate": 8.777955109816996e-07, "loss": 0.6475, "step": 11526 }, { "epoch": 1.67, "grad_norm": 6.558109283447266, "learning_rate": 8.776384162697665e-07, "loss": 0.5211, "step": 11527 }, { "epoch": 1.67, "grad_norm": 7.226150035858154, "learning_rate": 8.774813246234037e-07, "loss": 0.5375, "step": 11528 }, { "epoch": 1.67, "grad_norm": 6.790623664855957, "learning_rate": 8.773242360465476e-07, "loss": 0.5663, "step": 11529 }, { "epoch": 1.67, "grad_norm": 6.108089447021484, "learning_rate": 8.77167150543133e-07, "loss": 0.5429, "step": 11530 }, { "epoch": 1.67, "grad_norm": 7.9481425285339355, "learning_rate": 8.770100681170961e-07, "loss": 0.7138, "step": 11531 }, { "epoch": 1.67, "grad_norm": 6.283271312713623, "learning_rate": 8.768529887723718e-07, "loss": 0.4893, "step": 11532 }, { "epoch": 1.67, "grad_norm": 7.122101783752441, "learning_rate": 8.766959125128962e-07, "loss": 0.5312, "step": 11533 }, { "epoch": 1.67, "grad_norm": 6.127269744873047, "learning_rate": 8.765388393426039e-07, "loss": 0.5612, "step": 11534 }, { "epoch": 1.67, "grad_norm": 6.268712997436523, "learning_rate": 8.763817692654305e-07, "loss": 0.5308, "step": 11535 }, { "epoch": 1.67, "grad_norm": 6.440286636352539, "learning_rate": 8.762247022853109e-07, "loss": 0.5023, "step": 11536 }, { "epoch": 1.67, "grad_norm": 7.035902976989746, "learning_rate": 8.7606763840618e-07, "loss": 0.6001, "step": 11537 }, { "epoch": 1.67, "grad_norm": 7.267312526702881, "learning_rate": 8.759105776319736e-07, "loss": 0.5199, "step": 11538 }, { "epoch": 1.67, "grad_norm": 6.018749237060547, "learning_rate": 8.757535199666256e-07, "loss": 0.4498, "step": 11539 }, { "epoch": 1.67, "grad_norm": 6.3527045249938965, "learning_rate": 8.755964654140718e-07, "loss": 0.5383, "step": 11540 }, { "epoch": 1.67, "grad_norm": 6.405246257781982, "learning_rate": 8.75439413978246e-07, "loss": 0.598, "step": 11541 }, { "epoch": 1.67, "grad_norm": 5.8438849449157715, "learning_rate": 8.752823656630836e-07, "loss": 0.4709, "step": 11542 }, { "epoch": 1.67, "grad_norm": 7.468002796173096, "learning_rate": 8.751253204725185e-07, "loss": 0.5107, "step": 11543 }, { "epoch": 1.67, "grad_norm": 6.372033596038818, "learning_rate": 8.74968278410486e-07, "loss": 0.5045, "step": 11544 }, { "epoch": 1.68, "grad_norm": 6.506215572357178, "learning_rate": 8.748112394809198e-07, "loss": 0.4881, "step": 11545 }, { "epoch": 1.68, "grad_norm": 6.0914716720581055, "learning_rate": 8.746542036877549e-07, "loss": 0.5065, "step": 11546 }, { "epoch": 1.68, "grad_norm": 6.233615398406982, "learning_rate": 8.744971710349249e-07, "loss": 0.5935, "step": 11547 }, { "epoch": 1.68, "grad_norm": 6.046459674835205, "learning_rate": 8.743401415263646e-07, "loss": 0.4918, "step": 11548 }, { "epoch": 1.68, "grad_norm": 7.0137248039245605, "learning_rate": 8.741831151660077e-07, "loss": 0.4886, "step": 11549 }, { "epoch": 1.68, "grad_norm": 6.9634013175964355, "learning_rate": 8.740260919577889e-07, "loss": 0.5805, "step": 11550 }, { "epoch": 1.68, "grad_norm": 6.654594898223877, "learning_rate": 8.73869071905641e-07, "loss": 0.5632, "step": 11551 }, { "epoch": 1.68, "grad_norm": 7.090626239776611, "learning_rate": 8.737120550134991e-07, "loss": 0.5484, "step": 11552 }, { "epoch": 1.68, "grad_norm": 5.769149303436279, "learning_rate": 8.735550412852959e-07, "loss": 0.5051, "step": 11553 }, { "epoch": 1.68, "grad_norm": 6.797759056091309, "learning_rate": 8.733980307249662e-07, "loss": 0.5651, "step": 11554 }, { "epoch": 1.68, "grad_norm": 6.250176429748535, "learning_rate": 8.73241023336443e-07, "loss": 0.5134, "step": 11555 }, { "epoch": 1.68, "grad_norm": 6.49561882019043, "learning_rate": 8.730840191236601e-07, "loss": 0.5129, "step": 11556 }, { "epoch": 1.68, "grad_norm": 6.631711959838867, "learning_rate": 8.729270180905506e-07, "loss": 0.5309, "step": 11557 }, { "epoch": 1.68, "grad_norm": 6.679652214050293, "learning_rate": 8.727700202410485e-07, "loss": 0.509, "step": 11558 }, { "epoch": 1.68, "grad_norm": 7.512617588043213, "learning_rate": 8.726130255790864e-07, "loss": 0.56, "step": 11559 }, { "epoch": 1.68, "grad_norm": 6.469741344451904, "learning_rate": 8.724560341085985e-07, "loss": 0.5143, "step": 11560 }, { "epoch": 1.68, "grad_norm": 6.08613920211792, "learning_rate": 8.722990458335169e-07, "loss": 0.5475, "step": 11561 }, { "epoch": 1.68, "grad_norm": 6.69771146774292, "learning_rate": 8.721420607577755e-07, "loss": 0.5448, "step": 11562 }, { "epoch": 1.68, "grad_norm": 7.126326084136963, "learning_rate": 8.71985078885307e-07, "loss": 0.4793, "step": 11563 }, { "epoch": 1.68, "grad_norm": 6.851492404937744, "learning_rate": 8.718281002200441e-07, "loss": 0.5867, "step": 11564 }, { "epoch": 1.68, "grad_norm": 6.675488471984863, "learning_rate": 8.716711247659204e-07, "loss": 0.5406, "step": 11565 }, { "epoch": 1.68, "grad_norm": 6.221374988555908, "learning_rate": 8.715141525268675e-07, "loss": 0.5222, "step": 11566 }, { "epoch": 1.68, "grad_norm": 5.758565902709961, "learning_rate": 8.713571835068195e-07, "loss": 0.4519, "step": 11567 }, { "epoch": 1.68, "grad_norm": 6.26609992980957, "learning_rate": 8.712002177097078e-07, "loss": 0.461, "step": 11568 }, { "epoch": 1.68, "grad_norm": 6.114731788635254, "learning_rate": 8.710432551394656e-07, "loss": 0.4729, "step": 11569 }, { "epoch": 1.68, "grad_norm": 6.180004119873047, "learning_rate": 8.708862958000246e-07, "loss": 0.5511, "step": 11570 }, { "epoch": 1.68, "grad_norm": 6.299370288848877, "learning_rate": 8.707293396953181e-07, "loss": 0.4589, "step": 11571 }, { "epoch": 1.68, "grad_norm": 6.955484390258789, "learning_rate": 8.705723868292777e-07, "loss": 0.5663, "step": 11572 }, { "epoch": 1.68, "grad_norm": 6.031138896942139, "learning_rate": 8.704154372058363e-07, "loss": 0.545, "step": 11573 }, { "epoch": 1.68, "grad_norm": 7.016809463500977, "learning_rate": 8.70258490828925e-07, "loss": 0.5642, "step": 11574 }, { "epoch": 1.68, "grad_norm": 6.6760969161987305, "learning_rate": 8.701015477024768e-07, "loss": 0.5626, "step": 11575 }, { "epoch": 1.68, "grad_norm": 6.429482460021973, "learning_rate": 8.699446078304231e-07, "loss": 0.5486, "step": 11576 }, { "epoch": 1.68, "grad_norm": 6.319712162017822, "learning_rate": 8.697876712166962e-07, "loss": 0.5862, "step": 11577 }, { "epoch": 1.68, "grad_norm": 6.311394691467285, "learning_rate": 8.696307378652271e-07, "loss": 0.5194, "step": 11578 }, { "epoch": 1.68, "grad_norm": 5.983430862426758, "learning_rate": 8.694738077799486e-07, "loss": 0.5106, "step": 11579 }, { "epoch": 1.68, "grad_norm": 6.706974029541016, "learning_rate": 8.693168809647913e-07, "loss": 0.5777, "step": 11580 }, { "epoch": 1.68, "grad_norm": 7.5315046310424805, "learning_rate": 8.691599574236876e-07, "loss": 0.5343, "step": 11581 }, { "epoch": 1.68, "grad_norm": 8.576154708862305, "learning_rate": 8.690030371605681e-07, "loss": 0.5583, "step": 11582 }, { "epoch": 1.68, "grad_norm": 7.254024982452393, "learning_rate": 8.688461201793651e-07, "loss": 0.639, "step": 11583 }, { "epoch": 1.68, "grad_norm": 6.338915824890137, "learning_rate": 8.686892064840092e-07, "loss": 0.5219, "step": 11584 }, { "epoch": 1.68, "grad_norm": 6.4878153800964355, "learning_rate": 8.685322960784323e-07, "loss": 0.5208, "step": 11585 }, { "epoch": 1.68, "grad_norm": 6.768133163452148, "learning_rate": 8.683753889665644e-07, "loss": 0.6023, "step": 11586 }, { "epoch": 1.68, "grad_norm": 6.432076454162598, "learning_rate": 8.682184851523379e-07, "loss": 0.5242, "step": 11587 }, { "epoch": 1.68, "grad_norm": 6.779044151306152, "learning_rate": 8.680615846396827e-07, "loss": 0.5754, "step": 11588 }, { "epoch": 1.68, "grad_norm": 7.001156806945801, "learning_rate": 8.679046874325302e-07, "loss": 0.523, "step": 11589 }, { "epoch": 1.68, "grad_norm": 7.320629119873047, "learning_rate": 8.677477935348114e-07, "loss": 0.6812, "step": 11590 }, { "epoch": 1.68, "grad_norm": 6.441895484924316, "learning_rate": 8.675909029504563e-07, "loss": 0.5025, "step": 11591 }, { "epoch": 1.68, "grad_norm": 7.021982192993164, "learning_rate": 8.674340156833964e-07, "loss": 0.6346, "step": 11592 }, { "epoch": 1.68, "grad_norm": 5.942638397216797, "learning_rate": 8.672771317375614e-07, "loss": 0.5251, "step": 11593 }, { "epoch": 1.68, "grad_norm": 7.668302059173584, "learning_rate": 8.671202511168828e-07, "loss": 0.6091, "step": 11594 }, { "epoch": 1.68, "grad_norm": 6.401913166046143, "learning_rate": 8.669633738252899e-07, "loss": 0.5369, "step": 11595 }, { "epoch": 1.68, "grad_norm": 7.285916328430176, "learning_rate": 8.668064998667138e-07, "loss": 0.5409, "step": 11596 }, { "epoch": 1.68, "grad_norm": 6.511529922485352, "learning_rate": 8.666496292450844e-07, "loss": 0.595, "step": 11597 }, { "epoch": 1.68, "grad_norm": 7.023717403411865, "learning_rate": 8.664927619643319e-07, "loss": 0.5397, "step": 11598 }, { "epoch": 1.68, "grad_norm": 6.86083459854126, "learning_rate": 8.66335898028386e-07, "loss": 0.5908, "step": 11599 }, { "epoch": 1.68, "grad_norm": 5.642023086547852, "learning_rate": 8.661790374411774e-07, "loss": 0.4962, "step": 11600 }, { "epoch": 1.68, "grad_norm": 7.897003650665283, "learning_rate": 8.660221802066352e-07, "loss": 0.6413, "step": 11601 }, { "epoch": 1.68, "grad_norm": 6.712657928466797, "learning_rate": 8.658653263286899e-07, "loss": 0.5764, "step": 11602 }, { "epoch": 1.68, "grad_norm": 6.913859844207764, "learning_rate": 8.657084758112709e-07, "loss": 0.5478, "step": 11603 }, { "epoch": 1.68, "grad_norm": 6.518693447113037, "learning_rate": 8.655516286583075e-07, "loss": 0.5467, "step": 11604 }, { "epoch": 1.68, "grad_norm": 6.61881685256958, "learning_rate": 8.653947848737299e-07, "loss": 0.4894, "step": 11605 }, { "epoch": 1.68, "grad_norm": 6.400875568389893, "learning_rate": 8.652379444614674e-07, "loss": 0.5156, "step": 11606 }, { "epoch": 1.68, "grad_norm": 7.089518070220947, "learning_rate": 8.650811074254487e-07, "loss": 0.4661, "step": 11607 }, { "epoch": 1.68, "grad_norm": 6.501522064208984, "learning_rate": 8.649242737696041e-07, "loss": 0.5305, "step": 11608 }, { "epoch": 1.68, "grad_norm": 6.310154438018799, "learning_rate": 8.647674434978621e-07, "loss": 0.4974, "step": 11609 }, { "epoch": 1.68, "grad_norm": 6.187011241912842, "learning_rate": 8.646106166141523e-07, "loss": 0.5018, "step": 11610 }, { "epoch": 1.68, "grad_norm": 5.850053787231445, "learning_rate": 8.644537931224032e-07, "loss": 0.4342, "step": 11611 }, { "epoch": 1.68, "grad_norm": 6.683443546295166, "learning_rate": 8.642969730265445e-07, "loss": 0.5575, "step": 11612 }, { "epoch": 1.68, "grad_norm": 6.153594970703125, "learning_rate": 8.641401563305042e-07, "loss": 0.4954, "step": 11613 }, { "epoch": 1.69, "grad_norm": 6.519105911254883, "learning_rate": 8.639833430382114e-07, "loss": 0.5208, "step": 11614 }, { "epoch": 1.69, "grad_norm": 7.206883907318115, "learning_rate": 8.638265331535954e-07, "loss": 0.5797, "step": 11615 }, { "epoch": 1.69, "grad_norm": 6.300992965698242, "learning_rate": 8.636697266805843e-07, "loss": 0.5116, "step": 11616 }, { "epoch": 1.69, "grad_norm": 7.235518455505371, "learning_rate": 8.635129236231066e-07, "loss": 0.6448, "step": 11617 }, { "epoch": 1.69, "grad_norm": 6.04736328125, "learning_rate": 8.633561239850907e-07, "loss": 0.4592, "step": 11618 }, { "epoch": 1.69, "grad_norm": 6.3203511238098145, "learning_rate": 8.631993277704655e-07, "loss": 0.5605, "step": 11619 }, { "epoch": 1.69, "grad_norm": 5.821014404296875, "learning_rate": 8.630425349831583e-07, "loss": 0.5089, "step": 11620 }, { "epoch": 1.69, "grad_norm": 7.351871013641357, "learning_rate": 8.628857456270984e-07, "loss": 0.5451, "step": 11621 }, { "epoch": 1.69, "grad_norm": 5.963846206665039, "learning_rate": 8.627289597062127e-07, "loss": 0.5131, "step": 11622 }, { "epoch": 1.69, "grad_norm": 6.821381092071533, "learning_rate": 8.625721772244306e-07, "loss": 0.5333, "step": 11623 }, { "epoch": 1.69, "grad_norm": 7.211760520935059, "learning_rate": 8.624153981856789e-07, "loss": 0.5935, "step": 11624 }, { "epoch": 1.69, "grad_norm": 7.270487308502197, "learning_rate": 8.622586225938862e-07, "loss": 0.5762, "step": 11625 }, { "epoch": 1.69, "grad_norm": 6.366560935974121, "learning_rate": 8.621018504529793e-07, "loss": 0.5707, "step": 11626 }, { "epoch": 1.69, "grad_norm": 6.054601192474365, "learning_rate": 8.619450817668869e-07, "loss": 0.5208, "step": 11627 }, { "epoch": 1.69, "grad_norm": 6.657559871673584, "learning_rate": 8.617883165395359e-07, "loss": 0.5117, "step": 11628 }, { "epoch": 1.69, "grad_norm": 7.282499313354492, "learning_rate": 8.616315547748544e-07, "loss": 0.5768, "step": 11629 }, { "epoch": 1.69, "grad_norm": 7.141281604766846, "learning_rate": 8.61474796476769e-07, "loss": 0.5791, "step": 11630 }, { "epoch": 1.69, "grad_norm": 6.239952087402344, "learning_rate": 8.61318041649208e-07, "loss": 0.5626, "step": 11631 }, { "epoch": 1.69, "grad_norm": 7.143476963043213, "learning_rate": 8.611612902960978e-07, "loss": 0.5532, "step": 11632 }, { "epoch": 1.69, "grad_norm": 6.497376918792725, "learning_rate": 8.610045424213663e-07, "loss": 0.5809, "step": 11633 }, { "epoch": 1.69, "grad_norm": 6.748899459838867, "learning_rate": 8.608477980289396e-07, "loss": 0.5565, "step": 11634 }, { "epoch": 1.69, "grad_norm": 6.582120418548584, "learning_rate": 8.606910571227457e-07, "loss": 0.5693, "step": 11635 }, { "epoch": 1.69, "grad_norm": 6.525570869445801, "learning_rate": 8.605343197067104e-07, "loss": 0.5412, "step": 11636 }, { "epoch": 1.69, "grad_norm": 6.5229268074035645, "learning_rate": 8.603775857847617e-07, "loss": 0.4772, "step": 11637 }, { "epoch": 1.69, "grad_norm": 5.925321102142334, "learning_rate": 8.602208553608255e-07, "loss": 0.5179, "step": 11638 }, { "epoch": 1.69, "grad_norm": 7.111544609069824, "learning_rate": 8.600641284388287e-07, "loss": 0.4728, "step": 11639 }, { "epoch": 1.69, "grad_norm": 6.488502502441406, "learning_rate": 8.599074050226976e-07, "loss": 0.5917, "step": 11640 }, { "epoch": 1.69, "grad_norm": 6.486455917358398, "learning_rate": 8.597506851163589e-07, "loss": 0.5806, "step": 11641 }, { "epoch": 1.69, "grad_norm": 6.255457401275635, "learning_rate": 8.595939687237392e-07, "loss": 0.5873, "step": 11642 }, { "epoch": 1.69, "grad_norm": 6.553124904632568, "learning_rate": 8.594372558487639e-07, "loss": 0.5425, "step": 11643 }, { "epoch": 1.69, "grad_norm": 6.419970512390137, "learning_rate": 8.592805464953605e-07, "loss": 0.5548, "step": 11644 }, { "epoch": 1.69, "grad_norm": 6.458033084869385, "learning_rate": 8.59123840667454e-07, "loss": 0.5287, "step": 11645 }, { "epoch": 1.69, "grad_norm": 6.056144714355469, "learning_rate": 8.589671383689708e-07, "loss": 0.5508, "step": 11646 }, { "epoch": 1.69, "grad_norm": 5.906633377075195, "learning_rate": 8.588104396038366e-07, "loss": 0.5379, "step": 11647 }, { "epoch": 1.69, "grad_norm": 6.727090835571289, "learning_rate": 8.586537443759779e-07, "loss": 0.5523, "step": 11648 }, { "epoch": 1.69, "grad_norm": 7.594958305358887, "learning_rate": 8.584970526893192e-07, "loss": 0.5302, "step": 11649 }, { "epoch": 1.69, "grad_norm": 6.439207553863525, "learning_rate": 8.583403645477877e-07, "loss": 0.5415, "step": 11650 }, { "epoch": 1.69, "grad_norm": 7.403190612792969, "learning_rate": 8.581836799553076e-07, "loss": 0.565, "step": 11651 }, { "epoch": 1.69, "grad_norm": 7.563952922821045, "learning_rate": 8.580269989158054e-07, "loss": 0.5581, "step": 11652 }, { "epoch": 1.69, "grad_norm": 7.092591762542725, "learning_rate": 8.578703214332059e-07, "loss": 0.6097, "step": 11653 }, { "epoch": 1.69, "grad_norm": 6.293308258056641, "learning_rate": 8.577136475114348e-07, "loss": 0.5157, "step": 11654 }, { "epoch": 1.69, "grad_norm": 6.639946460723877, "learning_rate": 8.575569771544167e-07, "loss": 0.4908, "step": 11655 }, { "epoch": 1.69, "grad_norm": 5.836404323577881, "learning_rate": 8.574003103660774e-07, "loss": 0.5346, "step": 11656 }, { "epoch": 1.69, "grad_norm": 6.21488618850708, "learning_rate": 8.572436471503411e-07, "loss": 0.4582, "step": 11657 }, { "epoch": 1.69, "grad_norm": 5.9400248527526855, "learning_rate": 8.570869875111339e-07, "loss": 0.525, "step": 11658 }, { "epoch": 1.69, "grad_norm": 6.1565093994140625, "learning_rate": 8.569303314523797e-07, "loss": 0.5207, "step": 11659 }, { "epoch": 1.69, "grad_norm": 6.645319938659668, "learning_rate": 8.567736789780035e-07, "loss": 0.5007, "step": 11660 }, { "epoch": 1.69, "grad_norm": 7.398364067077637, "learning_rate": 8.566170300919303e-07, "loss": 0.5561, "step": 11661 }, { "epoch": 1.69, "grad_norm": 5.948824405670166, "learning_rate": 8.564603847980844e-07, "loss": 0.5083, "step": 11662 }, { "epoch": 1.69, "grad_norm": 6.657106399536133, "learning_rate": 8.563037431003901e-07, "loss": 0.5932, "step": 11663 }, { "epoch": 1.69, "grad_norm": 6.685351848602295, "learning_rate": 8.561471050027725e-07, "loss": 0.5456, "step": 11664 }, { "epoch": 1.69, "grad_norm": 6.733138084411621, "learning_rate": 8.559904705091548e-07, "loss": 0.5593, "step": 11665 }, { "epoch": 1.69, "grad_norm": 6.357392311096191, "learning_rate": 8.558338396234624e-07, "loss": 0.5305, "step": 11666 }, { "epoch": 1.69, "grad_norm": 5.9631242752075195, "learning_rate": 8.556772123496186e-07, "loss": 0.4726, "step": 11667 }, { "epoch": 1.69, "grad_norm": 6.818754196166992, "learning_rate": 8.555205886915477e-07, "loss": 0.5719, "step": 11668 }, { "epoch": 1.69, "grad_norm": 5.97669792175293, "learning_rate": 8.55363968653174e-07, "loss": 0.5185, "step": 11669 }, { "epoch": 1.69, "grad_norm": 6.2535247802734375, "learning_rate": 8.552073522384207e-07, "loss": 0.5161, "step": 11670 }, { "epoch": 1.69, "grad_norm": 6.552521228790283, "learning_rate": 8.550507394512123e-07, "loss": 0.5984, "step": 11671 }, { "epoch": 1.69, "grad_norm": 6.805569648742676, "learning_rate": 8.548941302954719e-07, "loss": 0.5764, "step": 11672 }, { "epoch": 1.69, "grad_norm": 7.137695789337158, "learning_rate": 8.547375247751234e-07, "loss": 0.5484, "step": 11673 }, { "epoch": 1.69, "grad_norm": 7.394567966461182, "learning_rate": 8.5458092289409e-07, "loss": 0.6287, "step": 11674 }, { "epoch": 1.69, "grad_norm": 7.380173206329346, "learning_rate": 8.544243246562957e-07, "loss": 0.6198, "step": 11675 }, { "epoch": 1.69, "grad_norm": 7.463286399841309, "learning_rate": 8.542677300656629e-07, "loss": 0.5709, "step": 11676 }, { "epoch": 1.69, "grad_norm": 6.546382427215576, "learning_rate": 8.541111391261158e-07, "loss": 0.5236, "step": 11677 }, { "epoch": 1.69, "grad_norm": 7.487311840057373, "learning_rate": 8.539545518415765e-07, "loss": 0.605, "step": 11678 }, { "epoch": 1.69, "grad_norm": 8.28836727142334, "learning_rate": 8.537979682159691e-07, "loss": 0.5216, "step": 11679 }, { "epoch": 1.69, "grad_norm": 5.965272426605225, "learning_rate": 8.536413882532158e-07, "loss": 0.526, "step": 11680 }, { "epoch": 1.69, "grad_norm": 6.416962623596191, "learning_rate": 8.534848119572401e-07, "loss": 0.487, "step": 11681 }, { "epoch": 1.7, "grad_norm": 5.990859508514404, "learning_rate": 8.533282393319637e-07, "loss": 0.5416, "step": 11682 }, { "epoch": 1.7, "grad_norm": 6.394798755645752, "learning_rate": 8.531716703813106e-07, "loss": 0.5959, "step": 11683 }, { "epoch": 1.7, "grad_norm": 6.60158634185791, "learning_rate": 8.530151051092021e-07, "loss": 0.5019, "step": 11684 }, { "epoch": 1.7, "grad_norm": 6.872127056121826, "learning_rate": 8.528585435195618e-07, "loss": 0.547, "step": 11685 }, { "epoch": 1.7, "grad_norm": 6.318319320678711, "learning_rate": 8.527019856163113e-07, "loss": 0.5398, "step": 11686 }, { "epoch": 1.7, "grad_norm": 7.057226657867432, "learning_rate": 8.525454314033736e-07, "loss": 0.565, "step": 11687 }, { "epoch": 1.7, "grad_norm": 7.038712978363037, "learning_rate": 8.523888808846703e-07, "loss": 0.5994, "step": 11688 }, { "epoch": 1.7, "grad_norm": 6.5681962966918945, "learning_rate": 8.522323340641239e-07, "loss": 0.4815, "step": 11689 }, { "epoch": 1.7, "grad_norm": 6.8067145347595215, "learning_rate": 8.52075790945656e-07, "loss": 0.489, "step": 11690 }, { "epoch": 1.7, "grad_norm": 7.833673477172852, "learning_rate": 8.519192515331891e-07, "loss": 0.5158, "step": 11691 }, { "epoch": 1.7, "grad_norm": 6.5768866539001465, "learning_rate": 8.517627158306444e-07, "loss": 0.5416, "step": 11692 }, { "epoch": 1.7, "grad_norm": 7.1594414710998535, "learning_rate": 8.516061838419442e-07, "loss": 0.5853, "step": 11693 }, { "epoch": 1.7, "grad_norm": 7.004317760467529, "learning_rate": 8.5144965557101e-07, "loss": 0.5587, "step": 11694 }, { "epoch": 1.7, "grad_norm": 6.915440559387207, "learning_rate": 8.51293131021763e-07, "loss": 0.542, "step": 11695 }, { "epoch": 1.7, "grad_norm": 7.062557220458984, "learning_rate": 8.511366101981255e-07, "loss": 0.4914, "step": 11696 }, { "epoch": 1.7, "grad_norm": 6.9746174812316895, "learning_rate": 8.509800931040179e-07, "loss": 0.5415, "step": 11697 }, { "epoch": 1.7, "grad_norm": 7.266144275665283, "learning_rate": 8.508235797433622e-07, "loss": 0.5125, "step": 11698 }, { "epoch": 1.7, "grad_norm": 5.943436622619629, "learning_rate": 8.50667070120079e-07, "loss": 0.471, "step": 11699 }, { "epoch": 1.7, "grad_norm": 6.9580559730529785, "learning_rate": 8.505105642380901e-07, "loss": 0.5775, "step": 11700 }, { "epoch": 1.7, "grad_norm": 6.843137741088867, "learning_rate": 8.50354062101316e-07, "loss": 0.538, "step": 11701 }, { "epoch": 1.7, "grad_norm": 7.324388027191162, "learning_rate": 8.501975637136779e-07, "loss": 0.4685, "step": 11702 }, { "epoch": 1.7, "grad_norm": 6.425405979156494, "learning_rate": 8.50041069079096e-07, "loss": 0.475, "step": 11703 }, { "epoch": 1.7, "grad_norm": 6.160481929779053, "learning_rate": 8.498845782014918e-07, "loss": 0.5141, "step": 11704 }, { "epoch": 1.7, "grad_norm": 6.591779708862305, "learning_rate": 8.497280910847853e-07, "loss": 0.6188, "step": 11705 }, { "epoch": 1.7, "grad_norm": 6.493879318237305, "learning_rate": 8.495716077328976e-07, "loss": 0.5356, "step": 11706 }, { "epoch": 1.7, "grad_norm": 5.989504337310791, "learning_rate": 8.494151281497488e-07, "loss": 0.536, "step": 11707 }, { "epoch": 1.7, "grad_norm": 6.667463302612305, "learning_rate": 8.49258652339259e-07, "loss": 0.5778, "step": 11708 }, { "epoch": 1.7, "grad_norm": 7.408315181732178, "learning_rate": 8.49102180305349e-07, "loss": 0.5862, "step": 11709 }, { "epoch": 1.7, "grad_norm": 6.472143650054932, "learning_rate": 8.489457120519388e-07, "loss": 0.5441, "step": 11710 }, { "epoch": 1.7, "grad_norm": 5.6680426597595215, "learning_rate": 8.487892475829481e-07, "loss": 0.5129, "step": 11711 }, { "epoch": 1.7, "grad_norm": 6.586246967315674, "learning_rate": 8.486327869022973e-07, "loss": 0.4949, "step": 11712 }, { "epoch": 1.7, "grad_norm": 7.523151397705078, "learning_rate": 8.484763300139058e-07, "loss": 0.5834, "step": 11713 }, { "epoch": 1.7, "grad_norm": 6.675732612609863, "learning_rate": 8.48319876921694e-07, "loss": 0.5495, "step": 11714 }, { "epoch": 1.7, "grad_norm": 5.910933971405029, "learning_rate": 8.48163427629581e-07, "loss": 0.5735, "step": 11715 }, { "epoch": 1.7, "grad_norm": 6.43829870223999, "learning_rate": 8.480069821414869e-07, "loss": 0.4972, "step": 11716 }, { "epoch": 1.7, "grad_norm": 6.915542125701904, "learning_rate": 8.478505404613305e-07, "loss": 0.5629, "step": 11717 }, { "epoch": 1.7, "grad_norm": 6.0646209716796875, "learning_rate": 8.47694102593032e-07, "loss": 0.5284, "step": 11718 }, { "epoch": 1.7, "grad_norm": 6.832338333129883, "learning_rate": 8.475376685405098e-07, "loss": 0.5647, "step": 11719 }, { "epoch": 1.7, "grad_norm": 7.439298152923584, "learning_rate": 8.473812383076835e-07, "loss": 0.679, "step": 11720 }, { "epoch": 1.7, "grad_norm": 6.630780220031738, "learning_rate": 8.472248118984729e-07, "loss": 0.5795, "step": 11721 }, { "epoch": 1.7, "grad_norm": 6.87608528137207, "learning_rate": 8.47068389316796e-07, "loss": 0.5601, "step": 11722 }, { "epoch": 1.7, "grad_norm": 6.243007659912109, "learning_rate": 8.469119705665726e-07, "loss": 0.5867, "step": 11723 }, { "epoch": 1.7, "grad_norm": 6.4831223487854, "learning_rate": 8.467555556517204e-07, "loss": 0.5141, "step": 11724 }, { "epoch": 1.7, "grad_norm": 6.800744533538818, "learning_rate": 8.465991445761593e-07, "loss": 0.6235, "step": 11725 }, { "epoch": 1.7, "grad_norm": 5.975419998168945, "learning_rate": 8.464427373438067e-07, "loss": 0.4989, "step": 11726 }, { "epoch": 1.7, "grad_norm": 5.901560306549072, "learning_rate": 8.462863339585827e-07, "loss": 0.4826, "step": 11727 }, { "epoch": 1.7, "grad_norm": 6.701052665710449, "learning_rate": 8.461299344244042e-07, "loss": 0.5241, "step": 11728 }, { "epoch": 1.7, "grad_norm": 6.594570159912109, "learning_rate": 8.459735387451904e-07, "loss": 0.5579, "step": 11729 }, { "epoch": 1.7, "grad_norm": 6.32706880569458, "learning_rate": 8.458171469248592e-07, "loss": 0.5394, "step": 11730 }, { "epoch": 1.7, "grad_norm": 6.319578647613525, "learning_rate": 8.456607589673293e-07, "loss": 0.521, "step": 11731 }, { "epoch": 1.7, "grad_norm": 6.114192008972168, "learning_rate": 8.455043748765176e-07, "loss": 0.5001, "step": 11732 }, { "epoch": 1.7, "grad_norm": 7.343868255615234, "learning_rate": 8.453479946563433e-07, "loss": 0.5058, "step": 11733 }, { "epoch": 1.7, "grad_norm": 6.120233535766602, "learning_rate": 8.451916183107233e-07, "loss": 0.5217, "step": 11734 }, { "epoch": 1.7, "grad_norm": 6.9455437660217285, "learning_rate": 8.450352458435762e-07, "loss": 0.5853, "step": 11735 }, { "epoch": 1.7, "grad_norm": 6.593429088592529, "learning_rate": 8.44878877258819e-07, "loss": 0.5794, "step": 11736 }, { "epoch": 1.7, "grad_norm": 6.491211891174316, "learning_rate": 8.447225125603698e-07, "loss": 0.5954, "step": 11737 }, { "epoch": 1.7, "grad_norm": 6.13824987411499, "learning_rate": 8.445661517521452e-07, "loss": 0.5077, "step": 11738 }, { "epoch": 1.7, "grad_norm": 6.8394317626953125, "learning_rate": 8.444097948380635e-07, "loss": 0.5036, "step": 11739 }, { "epoch": 1.7, "grad_norm": 7.180338382720947, "learning_rate": 8.442534418220411e-07, "loss": 0.5484, "step": 11740 }, { "epoch": 1.7, "grad_norm": 7.077279567718506, "learning_rate": 8.440970927079963e-07, "loss": 0.4658, "step": 11741 }, { "epoch": 1.7, "grad_norm": 7.712325572967529, "learning_rate": 8.43940747499845e-07, "loss": 0.6662, "step": 11742 }, { "epoch": 1.7, "grad_norm": 5.891168594360352, "learning_rate": 8.437844062015052e-07, "loss": 0.469, "step": 11743 }, { "epoch": 1.7, "grad_norm": 6.4722185134887695, "learning_rate": 8.436280688168929e-07, "loss": 0.4367, "step": 11744 }, { "epoch": 1.7, "grad_norm": 6.797786235809326, "learning_rate": 8.434717353499252e-07, "loss": 0.5834, "step": 11745 }, { "epoch": 1.7, "grad_norm": 6.760260105133057, "learning_rate": 8.433154058045191e-07, "loss": 0.5702, "step": 11746 }, { "epoch": 1.7, "grad_norm": 7.873418807983398, "learning_rate": 8.431590801845906e-07, "loss": 0.5878, "step": 11747 }, { "epoch": 1.7, "grad_norm": 6.539679050445557, "learning_rate": 8.43002758494057e-07, "loss": 0.5226, "step": 11748 }, { "epoch": 1.7, "grad_norm": 6.402670860290527, "learning_rate": 8.428464407368339e-07, "loss": 0.5116, "step": 11749 }, { "epoch": 1.7, "grad_norm": 8.501082420349121, "learning_rate": 8.426901269168381e-07, "loss": 0.6426, "step": 11750 }, { "epoch": 1.71, "grad_norm": 6.701398849487305, "learning_rate": 8.425338170379851e-07, "loss": 0.5753, "step": 11751 }, { "epoch": 1.71, "grad_norm": 6.799511432647705, "learning_rate": 8.42377511104192e-07, "loss": 0.509, "step": 11752 }, { "epoch": 1.71, "grad_norm": 6.495810031890869, "learning_rate": 8.422212091193736e-07, "loss": 0.5439, "step": 11753 }, { "epoch": 1.71, "grad_norm": 6.0484795570373535, "learning_rate": 8.420649110874472e-07, "loss": 0.5138, "step": 11754 }, { "epoch": 1.71, "grad_norm": 6.0171284675598145, "learning_rate": 8.419086170123271e-07, "loss": 0.5863, "step": 11755 }, { "epoch": 1.71, "grad_norm": 6.942042350769043, "learning_rate": 8.417523268979303e-07, "loss": 0.6108, "step": 11756 }, { "epoch": 1.71, "grad_norm": 6.84226131439209, "learning_rate": 8.415960407481717e-07, "loss": 0.5304, "step": 11757 }, { "epoch": 1.71, "grad_norm": 7.030750274658203, "learning_rate": 8.41439758566967e-07, "loss": 0.5376, "step": 11758 }, { "epoch": 1.71, "grad_norm": 6.414828300476074, "learning_rate": 8.412834803582312e-07, "loss": 0.4646, "step": 11759 }, { "epoch": 1.71, "grad_norm": 7.126876354217529, "learning_rate": 8.411272061258802e-07, "loss": 0.6282, "step": 11760 }, { "epoch": 1.71, "grad_norm": 6.347472190856934, "learning_rate": 8.409709358738285e-07, "loss": 0.5098, "step": 11761 }, { "epoch": 1.71, "grad_norm": 6.195860862731934, "learning_rate": 8.408146696059922e-07, "loss": 0.4848, "step": 11762 }, { "epoch": 1.71, "grad_norm": 6.667906284332275, "learning_rate": 8.406584073262852e-07, "loss": 0.4896, "step": 11763 }, { "epoch": 1.71, "grad_norm": 5.6557464599609375, "learning_rate": 8.40502149038623e-07, "loss": 0.5213, "step": 11764 }, { "epoch": 1.71, "grad_norm": 7.663573741912842, "learning_rate": 8.403458947469204e-07, "loss": 0.6035, "step": 11765 }, { "epoch": 1.71, "grad_norm": 6.157219886779785, "learning_rate": 8.401896444550922e-07, "loss": 0.5202, "step": 11766 }, { "epoch": 1.71, "grad_norm": 7.073161602020264, "learning_rate": 8.400333981670524e-07, "loss": 0.5504, "step": 11767 }, { "epoch": 1.71, "grad_norm": 7.462565898895264, "learning_rate": 8.398771558867163e-07, "loss": 0.5301, "step": 11768 }, { "epoch": 1.71, "grad_norm": 6.865884304046631, "learning_rate": 8.397209176179973e-07, "loss": 0.5, "step": 11769 }, { "epoch": 1.71, "grad_norm": 6.4802565574646, "learning_rate": 8.395646833648109e-07, "loss": 0.5288, "step": 11770 }, { "epoch": 1.71, "grad_norm": 7.069265842437744, "learning_rate": 8.394084531310703e-07, "loss": 0.5633, "step": 11771 }, { "epoch": 1.71, "grad_norm": 7.242343425750732, "learning_rate": 8.392522269206898e-07, "loss": 0.5816, "step": 11772 }, { "epoch": 1.71, "grad_norm": 6.59969425201416, "learning_rate": 8.390960047375842e-07, "loss": 0.4966, "step": 11773 }, { "epoch": 1.71, "grad_norm": 7.221831798553467, "learning_rate": 8.389397865856661e-07, "loss": 0.5507, "step": 11774 }, { "epoch": 1.71, "grad_norm": 7.892425060272217, "learning_rate": 8.387835724688506e-07, "loss": 0.5668, "step": 11775 }, { "epoch": 1.71, "grad_norm": 5.799147605895996, "learning_rate": 8.386273623910503e-07, "loss": 0.4798, "step": 11776 }, { "epoch": 1.71, "grad_norm": 5.66665506362915, "learning_rate": 8.384711563561792e-07, "loss": 0.4616, "step": 11777 }, { "epoch": 1.71, "grad_norm": 7.174037456512451, "learning_rate": 8.38314954368151e-07, "loss": 0.5928, "step": 11778 }, { "epoch": 1.71, "grad_norm": 6.941038608551025, "learning_rate": 8.381587564308791e-07, "loss": 0.4883, "step": 11779 }, { "epoch": 1.71, "grad_norm": 6.220743656158447, "learning_rate": 8.380025625482762e-07, "loss": 0.4701, "step": 11780 }, { "epoch": 1.71, "grad_norm": 6.235530853271484, "learning_rate": 8.378463727242562e-07, "loss": 0.5686, "step": 11781 }, { "epoch": 1.71, "grad_norm": 6.221225738525391, "learning_rate": 8.376901869627314e-07, "loss": 0.5526, "step": 11782 }, { "epoch": 1.71, "grad_norm": 5.941326141357422, "learning_rate": 8.375340052676158e-07, "loss": 0.4877, "step": 11783 }, { "epoch": 1.71, "grad_norm": 6.862452507019043, "learning_rate": 8.373778276428214e-07, "loss": 0.5459, "step": 11784 }, { "epoch": 1.71, "grad_norm": 5.912275314331055, "learning_rate": 8.372216540922616e-07, "loss": 0.4695, "step": 11785 }, { "epoch": 1.71, "grad_norm": 7.146899700164795, "learning_rate": 8.370654846198482e-07, "loss": 0.6, "step": 11786 }, { "epoch": 1.71, "grad_norm": 6.6916399002075195, "learning_rate": 8.369093192294949e-07, "loss": 0.5295, "step": 11787 }, { "epoch": 1.71, "grad_norm": 6.848529815673828, "learning_rate": 8.367531579251132e-07, "loss": 0.6341, "step": 11788 }, { "epoch": 1.71, "grad_norm": 6.4584197998046875, "learning_rate": 8.365970007106163e-07, "loss": 0.5273, "step": 11789 }, { "epoch": 1.71, "grad_norm": 5.891178607940674, "learning_rate": 8.364408475899153e-07, "loss": 0.4882, "step": 11790 }, { "epoch": 1.71, "grad_norm": 5.56870174407959, "learning_rate": 8.362846985669239e-07, "loss": 0.5171, "step": 11791 }, { "epoch": 1.71, "grad_norm": 6.150809288024902, "learning_rate": 8.361285536455531e-07, "loss": 0.4895, "step": 11792 }, { "epoch": 1.71, "grad_norm": 6.56540060043335, "learning_rate": 8.359724128297154e-07, "loss": 0.5832, "step": 11793 }, { "epoch": 1.71, "grad_norm": 7.531248092651367, "learning_rate": 8.358162761233217e-07, "loss": 0.5852, "step": 11794 }, { "epoch": 1.71, "grad_norm": 5.6502532958984375, "learning_rate": 8.35660143530285e-07, "loss": 0.492, "step": 11795 }, { "epoch": 1.71, "grad_norm": 5.951094627380371, "learning_rate": 8.35504015054516e-07, "loss": 0.5427, "step": 11796 }, { "epoch": 1.71, "grad_norm": 5.895692825317383, "learning_rate": 8.353478906999269e-07, "loss": 0.4889, "step": 11797 }, { "epoch": 1.71, "grad_norm": 6.580079078674316, "learning_rate": 8.351917704704288e-07, "loss": 0.5232, "step": 11798 }, { "epoch": 1.71, "grad_norm": 7.292557239532471, "learning_rate": 8.35035654369933e-07, "loss": 0.5411, "step": 11799 }, { "epoch": 1.71, "grad_norm": 6.7112956047058105, "learning_rate": 8.348795424023511e-07, "loss": 0.5888, "step": 11800 }, { "epoch": 1.71, "grad_norm": 6.287404537200928, "learning_rate": 8.347234345715935e-07, "loss": 0.5561, "step": 11801 }, { "epoch": 1.71, "grad_norm": 7.426023006439209, "learning_rate": 8.345673308815723e-07, "loss": 0.489, "step": 11802 }, { "epoch": 1.71, "grad_norm": 6.157516956329346, "learning_rate": 8.344112313361973e-07, "loss": 0.4957, "step": 11803 }, { "epoch": 1.71, "grad_norm": 7.013833045959473, "learning_rate": 8.342551359393802e-07, "loss": 0.5441, "step": 11804 }, { "epoch": 1.71, "grad_norm": 6.333244323730469, "learning_rate": 8.340990446950312e-07, "loss": 0.5646, "step": 11805 }, { "epoch": 1.71, "grad_norm": 6.460759162902832, "learning_rate": 8.339429576070614e-07, "loss": 0.5307, "step": 11806 }, { "epoch": 1.71, "grad_norm": 5.659340858459473, "learning_rate": 8.337868746793803e-07, "loss": 0.5126, "step": 11807 }, { "epoch": 1.71, "grad_norm": 6.533505439758301, "learning_rate": 8.336307959158996e-07, "loss": 0.4674, "step": 11808 }, { "epoch": 1.71, "grad_norm": 7.170180797576904, "learning_rate": 8.334747213205285e-07, "loss": 0.5067, "step": 11809 }, { "epoch": 1.71, "grad_norm": 6.473724365234375, "learning_rate": 8.333186508971783e-07, "loss": 0.5172, "step": 11810 }, { "epoch": 1.71, "grad_norm": 6.583017826080322, "learning_rate": 8.331625846497578e-07, "loss": 0.5393, "step": 11811 }, { "epoch": 1.71, "grad_norm": 7.127116680145264, "learning_rate": 8.330065225821782e-07, "loss": 0.5233, "step": 11812 }, { "epoch": 1.71, "grad_norm": 6.305294990539551, "learning_rate": 8.328504646983486e-07, "loss": 0.5332, "step": 11813 }, { "epoch": 1.71, "grad_norm": 6.0927934646606445, "learning_rate": 8.326944110021791e-07, "loss": 0.5058, "step": 11814 }, { "epoch": 1.71, "grad_norm": 6.49382209777832, "learning_rate": 8.325383614975791e-07, "loss": 0.5476, "step": 11815 }, { "epoch": 1.71, "grad_norm": 6.823755741119385, "learning_rate": 8.323823161884587e-07, "loss": 0.5546, "step": 11816 }, { "epoch": 1.71, "grad_norm": 7.835072040557861, "learning_rate": 8.322262750787266e-07, "loss": 0.5146, "step": 11817 }, { "epoch": 1.71, "grad_norm": 6.513607978820801, "learning_rate": 8.320702381722929e-07, "loss": 0.4746, "step": 11818 }, { "epoch": 1.71, "grad_norm": 6.776735782623291, "learning_rate": 8.319142054730664e-07, "loss": 0.5912, "step": 11819 }, { "epoch": 1.72, "grad_norm": 6.39315128326416, "learning_rate": 8.317581769849564e-07, "loss": 0.5565, "step": 11820 }, { "epoch": 1.72, "grad_norm": 6.9900712966918945, "learning_rate": 8.316021527118719e-07, "loss": 0.5014, "step": 11821 }, { "epoch": 1.72, "grad_norm": 6.139187812805176, "learning_rate": 8.31446132657722e-07, "loss": 0.4941, "step": 11822 }, { "epoch": 1.72, "grad_norm": 7.1220011711120605, "learning_rate": 8.312901168264149e-07, "loss": 0.6216, "step": 11823 }, { "epoch": 1.72, "grad_norm": 6.56021785736084, "learning_rate": 8.311341052218599e-07, "loss": 0.5826, "step": 11824 }, { "epoch": 1.72, "grad_norm": 7.317564487457275, "learning_rate": 8.309780978479659e-07, "loss": 0.5879, "step": 11825 }, { "epoch": 1.72, "grad_norm": 7.251960277557373, "learning_rate": 8.308220947086408e-07, "loss": 0.5831, "step": 11826 }, { "epoch": 1.72, "grad_norm": 7.560068607330322, "learning_rate": 8.306660958077936e-07, "loss": 0.5711, "step": 11827 }, { "epoch": 1.72, "grad_norm": 6.2864861488342285, "learning_rate": 8.305101011493316e-07, "loss": 0.5755, "step": 11828 }, { "epoch": 1.72, "grad_norm": 6.833716869354248, "learning_rate": 8.303541107371641e-07, "loss": 0.5484, "step": 11829 }, { "epoch": 1.72, "grad_norm": 7.200793743133545, "learning_rate": 8.301981245751983e-07, "loss": 0.5363, "step": 11830 }, { "epoch": 1.72, "grad_norm": 6.470170021057129, "learning_rate": 8.30042142667343e-07, "loss": 0.5456, "step": 11831 }, { "epoch": 1.72, "grad_norm": 7.485605716705322, "learning_rate": 8.298861650175055e-07, "loss": 0.5104, "step": 11832 }, { "epoch": 1.72, "grad_norm": 6.956447124481201, "learning_rate": 8.297301916295938e-07, "loss": 0.6155, "step": 11833 }, { "epoch": 1.72, "grad_norm": 6.487356185913086, "learning_rate": 8.295742225075153e-07, "loss": 0.4758, "step": 11834 }, { "epoch": 1.72, "grad_norm": 6.133538722991943, "learning_rate": 8.294182576551782e-07, "loss": 0.5331, "step": 11835 }, { "epoch": 1.72, "grad_norm": 6.379356861114502, "learning_rate": 8.292622970764888e-07, "loss": 0.4725, "step": 11836 }, { "epoch": 1.72, "grad_norm": 5.84912633895874, "learning_rate": 8.291063407753557e-07, "loss": 0.4392, "step": 11837 }, { "epoch": 1.72, "grad_norm": 7.478769779205322, "learning_rate": 8.28950388755685e-07, "loss": 0.5767, "step": 11838 }, { "epoch": 1.72, "grad_norm": 6.453232765197754, "learning_rate": 8.28794441021385e-07, "loss": 0.4369, "step": 11839 }, { "epoch": 1.72, "grad_norm": 7.2188496589660645, "learning_rate": 8.286384975763619e-07, "loss": 0.6031, "step": 11840 }, { "epoch": 1.72, "grad_norm": 7.289501190185547, "learning_rate": 8.284825584245228e-07, "loss": 0.571, "step": 11841 }, { "epoch": 1.72, "grad_norm": 5.91445779800415, "learning_rate": 8.283266235697743e-07, "loss": 0.4978, "step": 11842 }, { "epoch": 1.72, "grad_norm": 6.071147441864014, "learning_rate": 8.281706930160235e-07, "loss": 0.474, "step": 11843 }, { "epoch": 1.72, "grad_norm": 5.90602445602417, "learning_rate": 8.280147667671765e-07, "loss": 0.4885, "step": 11844 }, { "epoch": 1.72, "grad_norm": 6.409290313720703, "learning_rate": 8.278588448271405e-07, "loss": 0.5069, "step": 11845 }, { "epoch": 1.72, "grad_norm": 6.377934455871582, "learning_rate": 8.277029271998211e-07, "loss": 0.5708, "step": 11846 }, { "epoch": 1.72, "grad_norm": 6.458895683288574, "learning_rate": 8.275470138891252e-07, "loss": 0.5319, "step": 11847 }, { "epoch": 1.72, "grad_norm": 6.410774230957031, "learning_rate": 8.273911048989585e-07, "loss": 0.5083, "step": 11848 }, { "epoch": 1.72, "grad_norm": 6.762923717498779, "learning_rate": 8.27235200233227e-07, "loss": 0.529, "step": 11849 }, { "epoch": 1.72, "grad_norm": 6.652627468109131, "learning_rate": 8.270792998958373e-07, "loss": 0.5197, "step": 11850 }, { "epoch": 1.72, "grad_norm": 6.160274505615234, "learning_rate": 8.269234038906943e-07, "loss": 0.512, "step": 11851 }, { "epoch": 1.72, "grad_norm": 5.875275135040283, "learning_rate": 8.267675122217046e-07, "loss": 0.4907, "step": 11852 }, { "epoch": 1.72, "grad_norm": 6.53841495513916, "learning_rate": 8.266116248927732e-07, "loss": 0.5715, "step": 11853 }, { "epoch": 1.72, "grad_norm": 6.0695037841796875, "learning_rate": 8.264557419078061e-07, "loss": 0.5088, "step": 11854 }, { "epoch": 1.72, "grad_norm": 6.0196919441223145, "learning_rate": 8.262998632707079e-07, "loss": 0.4849, "step": 11855 }, { "epoch": 1.72, "grad_norm": 6.874877452850342, "learning_rate": 8.26143988985385e-07, "loss": 0.5231, "step": 11856 }, { "epoch": 1.72, "grad_norm": 6.066929340362549, "learning_rate": 8.259881190557412e-07, "loss": 0.4608, "step": 11857 }, { "epoch": 1.72, "grad_norm": 7.182775020599365, "learning_rate": 8.258322534856831e-07, "loss": 0.6047, "step": 11858 }, { "epoch": 1.72, "grad_norm": 6.807814598083496, "learning_rate": 8.256763922791143e-07, "loss": 0.5222, "step": 11859 }, { "epoch": 1.72, "grad_norm": 6.338559150695801, "learning_rate": 8.255205354399407e-07, "loss": 0.4578, "step": 11860 }, { "epoch": 1.72, "grad_norm": 7.078824043273926, "learning_rate": 8.253646829720664e-07, "loss": 0.5569, "step": 11861 }, { "epoch": 1.72, "grad_norm": 6.173571586608887, "learning_rate": 8.252088348793964e-07, "loss": 0.5936, "step": 11862 }, { "epoch": 1.72, "grad_norm": 6.522387981414795, "learning_rate": 8.250529911658346e-07, "loss": 0.5346, "step": 11863 }, { "epoch": 1.72, "grad_norm": 5.9513092041015625, "learning_rate": 8.248971518352863e-07, "loss": 0.5066, "step": 11864 }, { "epoch": 1.72, "grad_norm": 7.77631139755249, "learning_rate": 8.247413168916548e-07, "loss": 0.5604, "step": 11865 }, { "epoch": 1.72, "grad_norm": 6.52292013168335, "learning_rate": 8.245854863388455e-07, "loss": 0.5058, "step": 11866 }, { "epoch": 1.72, "grad_norm": 6.575742721557617, "learning_rate": 8.244296601807614e-07, "loss": 0.5122, "step": 11867 }, { "epoch": 1.72, "grad_norm": 6.390450954437256, "learning_rate": 8.24273838421307e-07, "loss": 0.5056, "step": 11868 }, { "epoch": 1.72, "grad_norm": 7.289604663848877, "learning_rate": 8.24118021064386e-07, "loss": 0.5258, "step": 11869 }, { "epoch": 1.72, "grad_norm": 6.192044258117676, "learning_rate": 8.239622081139025e-07, "loss": 0.5364, "step": 11870 }, { "epoch": 1.72, "grad_norm": 6.9508490562438965, "learning_rate": 8.238063995737593e-07, "loss": 0.6195, "step": 11871 }, { "epoch": 1.72, "grad_norm": 6.485160827636719, "learning_rate": 8.236505954478609e-07, "loss": 0.5684, "step": 11872 }, { "epoch": 1.72, "grad_norm": 6.457573413848877, "learning_rate": 8.234947957401101e-07, "loss": 0.4521, "step": 11873 }, { "epoch": 1.72, "grad_norm": 6.620335102081299, "learning_rate": 8.233390004544107e-07, "loss": 0.5702, "step": 11874 }, { "epoch": 1.72, "grad_norm": 6.492233753204346, "learning_rate": 8.231832095946653e-07, "loss": 0.5802, "step": 11875 }, { "epoch": 1.72, "grad_norm": 7.183796405792236, "learning_rate": 8.23027423164777e-07, "loss": 0.5687, "step": 11876 }, { "epoch": 1.72, "grad_norm": 6.3726935386657715, "learning_rate": 8.228716411686496e-07, "loss": 0.5246, "step": 11877 }, { "epoch": 1.72, "grad_norm": 7.3755059242248535, "learning_rate": 8.227158636101851e-07, "loss": 0.5368, "step": 11878 }, { "epoch": 1.72, "grad_norm": 6.662134170532227, "learning_rate": 8.225600904932869e-07, "loss": 0.5183, "step": 11879 }, { "epoch": 1.72, "grad_norm": 7.64225959777832, "learning_rate": 8.22404321821857e-07, "loss": 0.6467, "step": 11880 }, { "epoch": 1.72, "grad_norm": 6.88683557510376, "learning_rate": 8.222485575997985e-07, "loss": 0.5099, "step": 11881 }, { "epoch": 1.72, "grad_norm": 6.440598487854004, "learning_rate": 8.220927978310135e-07, "loss": 0.5326, "step": 11882 }, { "epoch": 1.72, "grad_norm": 6.678281784057617, "learning_rate": 8.219370425194046e-07, "loss": 0.4859, "step": 11883 }, { "epoch": 1.72, "grad_norm": 6.4313530921936035, "learning_rate": 8.217812916688732e-07, "loss": 0.5472, "step": 11884 }, { "epoch": 1.72, "grad_norm": 6.622872352600098, "learning_rate": 8.216255452833225e-07, "loss": 0.5525, "step": 11885 }, { "epoch": 1.72, "grad_norm": 6.643098831176758, "learning_rate": 8.214698033666535e-07, "loss": 0.5834, "step": 11886 }, { "epoch": 1.72, "grad_norm": 6.794183731079102, "learning_rate": 8.213140659227689e-07, "loss": 0.5421, "step": 11887 }, { "epoch": 1.72, "grad_norm": 7.480495452880859, "learning_rate": 8.211583329555698e-07, "loss": 0.5145, "step": 11888 }, { "epoch": 1.73, "grad_norm": 7.633810043334961, "learning_rate": 8.210026044689582e-07, "loss": 0.5891, "step": 11889 }, { "epoch": 1.73, "grad_norm": 6.724778175354004, "learning_rate": 8.208468804668354e-07, "loss": 0.5021, "step": 11890 }, { "epoch": 1.73, "grad_norm": 6.688288688659668, "learning_rate": 8.206911609531032e-07, "loss": 0.5406, "step": 11891 }, { "epoch": 1.73, "grad_norm": 6.748711585998535, "learning_rate": 8.205354459316621e-07, "loss": 0.6181, "step": 11892 }, { "epoch": 1.73, "grad_norm": 6.281558990478516, "learning_rate": 8.203797354064143e-07, "loss": 0.5649, "step": 11893 }, { "epoch": 1.73, "grad_norm": 6.748067378997803, "learning_rate": 8.202240293812599e-07, "loss": 0.5094, "step": 11894 }, { "epoch": 1.73, "grad_norm": 6.055753231048584, "learning_rate": 8.200683278601008e-07, "loss": 0.506, "step": 11895 }, { "epoch": 1.73, "grad_norm": 7.188993453979492, "learning_rate": 8.199126308468372e-07, "loss": 0.5616, "step": 11896 }, { "epoch": 1.73, "grad_norm": 6.81039571762085, "learning_rate": 8.197569383453702e-07, "loss": 0.595, "step": 11897 }, { "epoch": 1.73, "grad_norm": 6.685601234436035, "learning_rate": 8.196012503595999e-07, "loss": 0.5971, "step": 11898 }, { "epoch": 1.73, "grad_norm": 6.677687168121338, "learning_rate": 8.194455668934274e-07, "loss": 0.5788, "step": 11899 }, { "epoch": 1.73, "grad_norm": 6.691506862640381, "learning_rate": 8.192898879507526e-07, "loss": 0.6137, "step": 11900 }, { "epoch": 1.73, "grad_norm": 7.608061790466309, "learning_rate": 8.191342135354764e-07, "loss": 0.521, "step": 11901 }, { "epoch": 1.73, "grad_norm": 6.307859897613525, "learning_rate": 8.189785436514983e-07, "loss": 0.5226, "step": 11902 }, { "epoch": 1.73, "grad_norm": 6.743648052215576, "learning_rate": 8.188228783027187e-07, "loss": 0.4712, "step": 11903 }, { "epoch": 1.73, "grad_norm": 6.14378023147583, "learning_rate": 8.186672174930377e-07, "loss": 0.6054, "step": 11904 }, { "epoch": 1.73, "grad_norm": 6.29899263381958, "learning_rate": 8.185115612263546e-07, "loss": 0.5184, "step": 11905 }, { "epoch": 1.73, "grad_norm": 6.133598804473877, "learning_rate": 8.183559095065698e-07, "loss": 0.5098, "step": 11906 }, { "epoch": 1.73, "grad_norm": 6.048449993133545, "learning_rate": 8.182002623375821e-07, "loss": 0.5356, "step": 11907 }, { "epoch": 1.73, "grad_norm": 6.753785610198975, "learning_rate": 8.180446197232919e-07, "loss": 0.5977, "step": 11908 }, { "epoch": 1.73, "grad_norm": 6.149734020233154, "learning_rate": 8.17888981667598e-07, "loss": 0.5076, "step": 11909 }, { "epoch": 1.73, "grad_norm": 6.158999443054199, "learning_rate": 8.177333481743998e-07, "loss": 0.5595, "step": 11910 }, { "epoch": 1.73, "grad_norm": 6.4629621505737305, "learning_rate": 8.175777192475961e-07, "loss": 0.5813, "step": 11911 }, { "epoch": 1.73, "grad_norm": 7.569433689117432, "learning_rate": 8.174220948910867e-07, "loss": 0.5564, "step": 11912 }, { "epoch": 1.73, "grad_norm": 6.371335029602051, "learning_rate": 8.172664751087695e-07, "loss": 0.5537, "step": 11913 }, { "epoch": 1.73, "grad_norm": 6.6441168785095215, "learning_rate": 8.171108599045445e-07, "loss": 0.479, "step": 11914 }, { "epoch": 1.73, "grad_norm": 6.06588888168335, "learning_rate": 8.16955249282309e-07, "loss": 0.5805, "step": 11915 }, { "epoch": 1.73, "grad_norm": 6.37007999420166, "learning_rate": 8.167996432459629e-07, "loss": 0.5596, "step": 11916 }, { "epoch": 1.73, "grad_norm": 6.670806884765625, "learning_rate": 8.166440417994039e-07, "loss": 0.5302, "step": 11917 }, { "epoch": 1.73, "grad_norm": 6.651034832000732, "learning_rate": 8.164884449465308e-07, "loss": 0.4675, "step": 11918 }, { "epoch": 1.73, "grad_norm": 6.460810661315918, "learning_rate": 8.16332852691241e-07, "loss": 0.5323, "step": 11919 }, { "epoch": 1.73, "grad_norm": 5.721413612365723, "learning_rate": 8.161772650374335e-07, "loss": 0.529, "step": 11920 }, { "epoch": 1.73, "grad_norm": 6.2413811683654785, "learning_rate": 8.160216819890056e-07, "loss": 0.5551, "step": 11921 }, { "epoch": 1.73, "grad_norm": 6.678204536437988, "learning_rate": 8.15866103549856e-07, "loss": 0.5912, "step": 11922 }, { "epoch": 1.73, "grad_norm": 6.475175380706787, "learning_rate": 8.157105297238816e-07, "loss": 0.4708, "step": 11923 }, { "epoch": 1.73, "grad_norm": 6.878094673156738, "learning_rate": 8.155549605149805e-07, "loss": 0.4994, "step": 11924 }, { "epoch": 1.73, "grad_norm": 6.247983455657959, "learning_rate": 8.153993959270503e-07, "loss": 0.5544, "step": 11925 }, { "epoch": 1.73, "grad_norm": 6.57841682434082, "learning_rate": 8.152438359639884e-07, "loss": 0.5083, "step": 11926 }, { "epoch": 1.73, "grad_norm": 6.303935527801514, "learning_rate": 8.150882806296916e-07, "loss": 0.5382, "step": 11927 }, { "epoch": 1.73, "grad_norm": 6.504343509674072, "learning_rate": 8.149327299280575e-07, "loss": 0.5267, "step": 11928 }, { "epoch": 1.73, "grad_norm": 6.299102306365967, "learning_rate": 8.147771838629836e-07, "loss": 0.4651, "step": 11929 }, { "epoch": 1.73, "grad_norm": 6.19891357421875, "learning_rate": 8.146216424383661e-07, "loss": 0.5609, "step": 11930 }, { "epoch": 1.73, "grad_norm": 6.397455215454102, "learning_rate": 8.144661056581023e-07, "loss": 0.4803, "step": 11931 }, { "epoch": 1.73, "grad_norm": 7.060842514038086, "learning_rate": 8.143105735260885e-07, "loss": 0.4947, "step": 11932 }, { "epoch": 1.73, "grad_norm": 6.29411506652832, "learning_rate": 8.141550460462219e-07, "loss": 0.4769, "step": 11933 }, { "epoch": 1.73, "grad_norm": 6.955223083496094, "learning_rate": 8.139995232223983e-07, "loss": 0.5528, "step": 11934 }, { "epoch": 1.73, "grad_norm": 6.108619213104248, "learning_rate": 8.138440050585148e-07, "loss": 0.495, "step": 11935 }, { "epoch": 1.73, "grad_norm": 6.919744491577148, "learning_rate": 8.13688491558467e-07, "loss": 0.5887, "step": 11936 }, { "epoch": 1.73, "grad_norm": 6.426918029785156, "learning_rate": 8.135329827261514e-07, "loss": 0.53, "step": 11937 }, { "epoch": 1.73, "grad_norm": 6.206199645996094, "learning_rate": 8.133774785654639e-07, "loss": 0.4908, "step": 11938 }, { "epoch": 1.73, "grad_norm": 6.639031410217285, "learning_rate": 8.132219790803007e-07, "loss": 0.5524, "step": 11939 }, { "epoch": 1.73, "grad_norm": 6.499629020690918, "learning_rate": 8.130664842745569e-07, "loss": 0.5107, "step": 11940 }, { "epoch": 1.73, "grad_norm": 6.454294204711914, "learning_rate": 8.129109941521291e-07, "loss": 0.5068, "step": 11941 }, { "epoch": 1.73, "grad_norm": 6.492408275604248, "learning_rate": 8.127555087169118e-07, "loss": 0.4751, "step": 11942 }, { "epoch": 1.73, "grad_norm": 6.143373489379883, "learning_rate": 8.126000279728015e-07, "loss": 0.5075, "step": 11943 }, { "epoch": 1.73, "grad_norm": 6.064204216003418, "learning_rate": 8.124445519236928e-07, "loss": 0.5813, "step": 11944 }, { "epoch": 1.73, "grad_norm": 6.9925312995910645, "learning_rate": 8.122890805734812e-07, "loss": 0.5483, "step": 11945 }, { "epoch": 1.73, "grad_norm": 6.3594841957092285, "learning_rate": 8.121336139260614e-07, "loss": 0.5392, "step": 11946 }, { "epoch": 1.73, "grad_norm": 6.57554292678833, "learning_rate": 8.11978151985329e-07, "loss": 0.5242, "step": 11947 }, { "epoch": 1.73, "grad_norm": 5.746985912322998, "learning_rate": 8.118226947551781e-07, "loss": 0.462, "step": 11948 }, { "epoch": 1.73, "grad_norm": 6.104576587677002, "learning_rate": 8.116672422395042e-07, "loss": 0.4952, "step": 11949 }, { "epoch": 1.73, "grad_norm": 6.2671732902526855, "learning_rate": 8.115117944422012e-07, "loss": 0.5887, "step": 11950 }, { "epoch": 1.73, "grad_norm": 5.945054054260254, "learning_rate": 8.113563513671643e-07, "loss": 0.4523, "step": 11951 }, { "epoch": 1.73, "grad_norm": 6.749773025512695, "learning_rate": 8.112009130182873e-07, "loss": 0.5795, "step": 11952 }, { "epoch": 1.73, "grad_norm": 7.004549503326416, "learning_rate": 8.110454793994649e-07, "loss": 0.5366, "step": 11953 }, { "epoch": 1.73, "grad_norm": 6.877803802490234, "learning_rate": 8.108900505145905e-07, "loss": 0.5361, "step": 11954 }, { "epoch": 1.73, "grad_norm": 7.188821315765381, "learning_rate": 8.107346263675587e-07, "loss": 0.586, "step": 11955 }, { "epoch": 1.73, "grad_norm": 7.406158924102783, "learning_rate": 8.105792069622637e-07, "loss": 0.538, "step": 11956 }, { "epoch": 1.73, "grad_norm": 5.786779880523682, "learning_rate": 8.104237923025986e-07, "loss": 0.4788, "step": 11957 }, { "epoch": 1.74, "grad_norm": 6.370655059814453, "learning_rate": 8.102683823924574e-07, "loss": 0.5126, "step": 11958 }, { "epoch": 1.74, "grad_norm": 6.352695465087891, "learning_rate": 8.101129772357337e-07, "loss": 0.588, "step": 11959 }, { "epoch": 1.74, "grad_norm": 6.361622333526611, "learning_rate": 8.09957576836321e-07, "loss": 0.5272, "step": 11960 }, { "epoch": 1.74, "grad_norm": 5.752233982086182, "learning_rate": 8.098021811981119e-07, "loss": 0.5766, "step": 11961 }, { "epoch": 1.74, "grad_norm": 6.580893516540527, "learning_rate": 8.096467903250006e-07, "loss": 0.5272, "step": 11962 }, { "epoch": 1.74, "grad_norm": 7.297673225402832, "learning_rate": 8.094914042208793e-07, "loss": 0.597, "step": 11963 }, { "epoch": 1.74, "grad_norm": 7.057533264160156, "learning_rate": 8.093360228896416e-07, "loss": 0.5646, "step": 11964 }, { "epoch": 1.74, "grad_norm": 7.051435470581055, "learning_rate": 8.091806463351801e-07, "loss": 0.5793, "step": 11965 }, { "epoch": 1.74, "grad_norm": 6.8182902336120605, "learning_rate": 8.090252745613874e-07, "loss": 0.5307, "step": 11966 }, { "epoch": 1.74, "grad_norm": 5.9814453125, "learning_rate": 8.088699075721558e-07, "loss": 0.4986, "step": 11967 }, { "epoch": 1.74, "grad_norm": 6.165930271148682, "learning_rate": 8.087145453713785e-07, "loss": 0.5814, "step": 11968 }, { "epoch": 1.74, "grad_norm": 6.627184867858887, "learning_rate": 8.085591879629471e-07, "loss": 0.4766, "step": 11969 }, { "epoch": 1.74, "grad_norm": 6.713446617126465, "learning_rate": 8.084038353507547e-07, "loss": 0.5903, "step": 11970 }, { "epoch": 1.74, "grad_norm": 5.806267261505127, "learning_rate": 8.082484875386924e-07, "loss": 0.472, "step": 11971 }, { "epoch": 1.74, "grad_norm": 6.326876640319824, "learning_rate": 8.080931445306531e-07, "loss": 0.5398, "step": 11972 }, { "epoch": 1.74, "grad_norm": 6.50249719619751, "learning_rate": 8.079378063305279e-07, "loss": 0.5337, "step": 11973 }, { "epoch": 1.74, "grad_norm": 6.445793628692627, "learning_rate": 8.077824729422092e-07, "loss": 0.5482, "step": 11974 }, { "epoch": 1.74, "grad_norm": 6.765627861022949, "learning_rate": 8.076271443695879e-07, "loss": 0.5042, "step": 11975 }, { "epoch": 1.74, "grad_norm": 7.295193672180176, "learning_rate": 8.074718206165564e-07, "loss": 0.4755, "step": 11976 }, { "epoch": 1.74, "grad_norm": 6.896414279937744, "learning_rate": 8.073165016870051e-07, "loss": 0.5852, "step": 11977 }, { "epoch": 1.74, "grad_norm": 6.600100517272949, "learning_rate": 8.071611875848263e-07, "loss": 0.5993, "step": 11978 }, { "epoch": 1.74, "grad_norm": 6.436877727508545, "learning_rate": 8.070058783139104e-07, "loss": 0.5057, "step": 11979 }, { "epoch": 1.74, "grad_norm": 6.907227516174316, "learning_rate": 8.068505738781484e-07, "loss": 0.4813, "step": 11980 }, { "epoch": 1.74, "grad_norm": 6.135132789611816, "learning_rate": 8.066952742814318e-07, "loss": 0.5223, "step": 11981 }, { "epoch": 1.74, "grad_norm": 6.566888332366943, "learning_rate": 8.065399795276506e-07, "loss": 0.5502, "step": 11982 }, { "epoch": 1.74, "grad_norm": 7.481380939483643, "learning_rate": 8.063846896206963e-07, "loss": 0.5209, "step": 11983 }, { "epoch": 1.74, "grad_norm": 6.638273239135742, "learning_rate": 8.062294045644587e-07, "loss": 0.6161, "step": 11984 }, { "epoch": 1.74, "grad_norm": 6.971890449523926, "learning_rate": 8.060741243628288e-07, "loss": 0.4695, "step": 11985 }, { "epoch": 1.74, "grad_norm": 6.687419414520264, "learning_rate": 8.059188490196964e-07, "loss": 0.5221, "step": 11986 }, { "epoch": 1.74, "grad_norm": 6.9083709716796875, "learning_rate": 8.057635785389522e-07, "loss": 0.5646, "step": 11987 }, { "epoch": 1.74, "grad_norm": 6.237143516540527, "learning_rate": 8.056083129244855e-07, "loss": 0.4498, "step": 11988 }, { "epoch": 1.74, "grad_norm": 7.474064826965332, "learning_rate": 8.054530521801871e-07, "loss": 0.6309, "step": 11989 }, { "epoch": 1.74, "grad_norm": 6.149425983428955, "learning_rate": 8.052977963099459e-07, "loss": 0.4908, "step": 11990 }, { "epoch": 1.74, "grad_norm": 6.827445983886719, "learning_rate": 8.051425453176526e-07, "loss": 0.5173, "step": 11991 }, { "epoch": 1.74, "grad_norm": 6.434248924255371, "learning_rate": 8.049872992071958e-07, "loss": 0.5562, "step": 11992 }, { "epoch": 1.74, "grad_norm": 5.991114139556885, "learning_rate": 8.048320579824654e-07, "loss": 0.525, "step": 11993 }, { "epoch": 1.74, "grad_norm": 7.053271770477295, "learning_rate": 8.046768216473508e-07, "loss": 0.5694, "step": 11994 }, { "epoch": 1.74, "grad_norm": 6.724156379699707, "learning_rate": 8.045215902057412e-07, "loss": 0.5277, "step": 11995 }, { "epoch": 1.74, "grad_norm": 6.787942409515381, "learning_rate": 8.043663636615251e-07, "loss": 0.6192, "step": 11996 }, { "epoch": 1.74, "grad_norm": 6.818273067474365, "learning_rate": 8.042111420185924e-07, "loss": 0.4868, "step": 11997 }, { "epoch": 1.74, "grad_norm": 5.949244499206543, "learning_rate": 8.040559252808309e-07, "loss": 0.5153, "step": 11998 }, { "epoch": 1.74, "grad_norm": 6.422508716583252, "learning_rate": 8.039007134521303e-07, "loss": 0.5377, "step": 11999 }, { "epoch": 1.74, "grad_norm": 6.070014953613281, "learning_rate": 8.037455065363784e-07, "loss": 0.4821, "step": 12000 }, { "epoch": 1.74, "grad_norm": 6.797524452209473, "learning_rate": 8.035903045374642e-07, "loss": 0.5534, "step": 12001 }, { "epoch": 1.74, "grad_norm": 6.8017988204956055, "learning_rate": 8.034351074592754e-07, "loss": 0.6031, "step": 12002 }, { "epoch": 1.74, "grad_norm": 6.599775791168213, "learning_rate": 8.032799153057011e-07, "loss": 0.5995, "step": 12003 }, { "epoch": 1.74, "grad_norm": 6.775115489959717, "learning_rate": 8.031247280806283e-07, "loss": 0.6198, "step": 12004 }, { "epoch": 1.74, "grad_norm": 6.430541515350342, "learning_rate": 8.029695457879461e-07, "loss": 0.5489, "step": 12005 }, { "epoch": 1.74, "grad_norm": 6.943136215209961, "learning_rate": 8.028143684315413e-07, "loss": 0.5271, "step": 12006 }, { "epoch": 1.74, "grad_norm": 6.263211727142334, "learning_rate": 8.026591960153023e-07, "loss": 0.5548, "step": 12007 }, { "epoch": 1.74, "grad_norm": 6.708940505981445, "learning_rate": 8.025040285431167e-07, "loss": 0.6065, "step": 12008 }, { "epoch": 1.74, "grad_norm": 6.279041767120361, "learning_rate": 8.023488660188713e-07, "loss": 0.511, "step": 12009 }, { "epoch": 1.74, "grad_norm": 6.048872470855713, "learning_rate": 8.021937084464545e-07, "loss": 0.5215, "step": 12010 }, { "epoch": 1.74, "grad_norm": 6.82802677154541, "learning_rate": 8.020385558297524e-07, "loss": 0.5129, "step": 12011 }, { "epoch": 1.74, "grad_norm": 7.045236110687256, "learning_rate": 8.018834081726531e-07, "loss": 0.6406, "step": 12012 }, { "epoch": 1.74, "grad_norm": 5.928106784820557, "learning_rate": 8.017282654790428e-07, "loss": 0.5219, "step": 12013 }, { "epoch": 1.74, "grad_norm": 7.252013683319092, "learning_rate": 8.01573127752809e-07, "loss": 0.5894, "step": 12014 }, { "epoch": 1.74, "grad_norm": 6.523187637329102, "learning_rate": 8.014179949978375e-07, "loss": 0.5048, "step": 12015 }, { "epoch": 1.74, "grad_norm": 8.564708709716797, "learning_rate": 8.01262867218016e-07, "loss": 0.6517, "step": 12016 }, { "epoch": 1.74, "grad_norm": 5.943832874298096, "learning_rate": 8.011077444172301e-07, "loss": 0.4981, "step": 12017 }, { "epoch": 1.74, "grad_norm": 6.521557331085205, "learning_rate": 8.009526265993668e-07, "loss": 0.5061, "step": 12018 }, { "epoch": 1.74, "grad_norm": 6.978695869445801, "learning_rate": 8.007975137683116e-07, "loss": 0.5665, "step": 12019 }, { "epoch": 1.74, "grad_norm": 6.646485328674316, "learning_rate": 8.006424059279515e-07, "loss": 0.5242, "step": 12020 }, { "epoch": 1.74, "grad_norm": 6.906246185302734, "learning_rate": 8.004873030821717e-07, "loss": 0.4691, "step": 12021 }, { "epoch": 1.74, "grad_norm": 6.5264892578125, "learning_rate": 8.003322052348587e-07, "loss": 0.4589, "step": 12022 }, { "epoch": 1.74, "grad_norm": 6.615780353546143, "learning_rate": 8.001771123898973e-07, "loss": 0.4998, "step": 12023 }, { "epoch": 1.74, "grad_norm": 7.200344085693359, "learning_rate": 8.000220245511742e-07, "loss": 0.5274, "step": 12024 }, { "epoch": 1.74, "grad_norm": 6.420661449432373, "learning_rate": 7.998669417225738e-07, "loss": 0.4811, "step": 12025 }, { "epoch": 1.74, "grad_norm": 6.63258695602417, "learning_rate": 7.997118639079825e-07, "loss": 0.5485, "step": 12026 }, { "epoch": 1.75, "grad_norm": 6.381819725036621, "learning_rate": 7.995567911112848e-07, "loss": 0.4863, "step": 12027 }, { "epoch": 1.75, "grad_norm": 6.697355270385742, "learning_rate": 7.99401723336366e-07, "loss": 0.5499, "step": 12028 }, { "epoch": 1.75, "grad_norm": 7.234972953796387, "learning_rate": 7.992466605871111e-07, "loss": 0.5738, "step": 12029 }, { "epoch": 1.75, "grad_norm": 6.468092918395996, "learning_rate": 7.990916028674052e-07, "loss": 0.5461, "step": 12030 }, { "epoch": 1.75, "grad_norm": 6.723159313201904, "learning_rate": 7.989365501811323e-07, "loss": 0.5159, "step": 12031 }, { "epoch": 1.75, "grad_norm": 6.6666741371154785, "learning_rate": 7.987815025321774e-07, "loss": 0.5283, "step": 12032 }, { "epoch": 1.75, "grad_norm": 7.2380523681640625, "learning_rate": 7.986264599244253e-07, "loss": 0.5806, "step": 12033 }, { "epoch": 1.75, "grad_norm": 6.389312744140625, "learning_rate": 7.9847142236176e-07, "loss": 0.4834, "step": 12034 }, { "epoch": 1.75, "grad_norm": 6.855817794799805, "learning_rate": 7.983163898480659e-07, "loss": 0.632, "step": 12035 }, { "epoch": 1.75, "grad_norm": 6.364849090576172, "learning_rate": 7.981613623872267e-07, "loss": 0.5514, "step": 12036 }, { "epoch": 1.75, "grad_norm": 6.93255615234375, "learning_rate": 7.980063399831268e-07, "loss": 0.569, "step": 12037 }, { "epoch": 1.75, "grad_norm": 6.346197128295898, "learning_rate": 7.978513226396496e-07, "loss": 0.5042, "step": 12038 }, { "epoch": 1.75, "grad_norm": 6.735973358154297, "learning_rate": 7.976963103606794e-07, "loss": 0.5092, "step": 12039 }, { "epoch": 1.75, "grad_norm": 5.9874138832092285, "learning_rate": 7.97541303150099e-07, "loss": 0.5247, "step": 12040 }, { "epoch": 1.75, "grad_norm": 6.47799825668335, "learning_rate": 7.973863010117928e-07, "loss": 0.4661, "step": 12041 }, { "epoch": 1.75, "grad_norm": 7.1237006187438965, "learning_rate": 7.972313039496432e-07, "loss": 0.5366, "step": 12042 }, { "epoch": 1.75, "grad_norm": 6.168123245239258, "learning_rate": 7.970763119675342e-07, "loss": 0.4821, "step": 12043 }, { "epoch": 1.75, "grad_norm": 6.971748352050781, "learning_rate": 7.969213250693481e-07, "loss": 0.4844, "step": 12044 }, { "epoch": 1.75, "grad_norm": 6.15774393081665, "learning_rate": 7.967663432589685e-07, "loss": 0.5078, "step": 12045 }, { "epoch": 1.75, "grad_norm": 6.659990310668945, "learning_rate": 7.966113665402776e-07, "loss": 0.5825, "step": 12046 }, { "epoch": 1.75, "grad_norm": 6.790210247039795, "learning_rate": 7.96456394917159e-07, "loss": 0.5087, "step": 12047 }, { "epoch": 1.75, "grad_norm": 6.149840831756592, "learning_rate": 7.963014283934943e-07, "loss": 0.5141, "step": 12048 }, { "epoch": 1.75, "grad_norm": 7.046772480010986, "learning_rate": 7.961464669731663e-07, "loss": 0.537, "step": 12049 }, { "epoch": 1.75, "grad_norm": 6.631532669067383, "learning_rate": 7.959915106600575e-07, "loss": 0.5685, "step": 12050 }, { "epoch": 1.75, "grad_norm": 6.679208755493164, "learning_rate": 7.9583655945805e-07, "loss": 0.5073, "step": 12051 }, { "epoch": 1.75, "grad_norm": 6.261279106140137, "learning_rate": 7.956816133710256e-07, "loss": 0.5267, "step": 12052 }, { "epoch": 1.75, "grad_norm": 6.220621109008789, "learning_rate": 7.955266724028667e-07, "loss": 0.5346, "step": 12053 }, { "epoch": 1.75, "grad_norm": 6.418439865112305, "learning_rate": 7.953717365574542e-07, "loss": 0.5489, "step": 12054 }, { "epoch": 1.75, "grad_norm": 6.457374572753906, "learning_rate": 7.952168058386709e-07, "loss": 0.5469, "step": 12055 }, { "epoch": 1.75, "grad_norm": 6.623414039611816, "learning_rate": 7.950618802503976e-07, "loss": 0.5369, "step": 12056 }, { "epoch": 1.75, "grad_norm": 6.933554649353027, "learning_rate": 7.949069597965161e-07, "loss": 0.5141, "step": 12057 }, { "epoch": 1.75, "grad_norm": 6.969639301300049, "learning_rate": 7.94752044480907e-07, "loss": 0.56, "step": 12058 }, { "epoch": 1.75, "grad_norm": 8.565661430358887, "learning_rate": 7.945971343074519e-07, "loss": 0.5416, "step": 12059 }, { "epoch": 1.75, "grad_norm": 6.547364234924316, "learning_rate": 7.944422292800323e-07, "loss": 0.5566, "step": 12060 }, { "epoch": 1.75, "grad_norm": 6.517023086547852, "learning_rate": 7.942873294025285e-07, "loss": 0.498, "step": 12061 }, { "epoch": 1.75, "grad_norm": 6.387355327606201, "learning_rate": 7.941324346788214e-07, "loss": 0.5414, "step": 12062 }, { "epoch": 1.75, "grad_norm": 6.172355651855469, "learning_rate": 7.939775451127917e-07, "loss": 0.5394, "step": 12063 }, { "epoch": 1.75, "grad_norm": 6.596537113189697, "learning_rate": 7.9382266070832e-07, "loss": 0.4436, "step": 12064 }, { "epoch": 1.75, "grad_norm": 6.9112653732299805, "learning_rate": 7.936677814692861e-07, "loss": 0.5361, "step": 12065 }, { "epoch": 1.75, "grad_norm": 6.840230464935303, "learning_rate": 7.935129073995712e-07, "loss": 0.5248, "step": 12066 }, { "epoch": 1.75, "grad_norm": 5.934342861175537, "learning_rate": 7.933580385030544e-07, "loss": 0.5522, "step": 12067 }, { "epoch": 1.75, "grad_norm": 6.08225679397583, "learning_rate": 7.932031747836165e-07, "loss": 0.5025, "step": 12068 }, { "epoch": 1.75, "grad_norm": 6.198935031890869, "learning_rate": 7.930483162451371e-07, "loss": 0.5409, "step": 12069 }, { "epoch": 1.75, "grad_norm": 6.658071517944336, "learning_rate": 7.928934628914958e-07, "loss": 0.5418, "step": 12070 }, { "epoch": 1.75, "grad_norm": 6.35699462890625, "learning_rate": 7.92738614726572e-07, "loss": 0.5596, "step": 12071 }, { "epoch": 1.75, "grad_norm": 5.949528694152832, "learning_rate": 7.925837717542458e-07, "loss": 0.5734, "step": 12072 }, { "epoch": 1.75, "grad_norm": 6.5199761390686035, "learning_rate": 7.92428933978396e-07, "loss": 0.5533, "step": 12073 }, { "epoch": 1.75, "grad_norm": 6.984462261199951, "learning_rate": 7.922741014029022e-07, "loss": 0.5368, "step": 12074 }, { "epoch": 1.75, "grad_norm": 6.829431533813477, "learning_rate": 7.921192740316428e-07, "loss": 0.6131, "step": 12075 }, { "epoch": 1.75, "grad_norm": 6.5762224197387695, "learning_rate": 7.919644518684978e-07, "loss": 0.5052, "step": 12076 }, { "epoch": 1.75, "grad_norm": 7.381744384765625, "learning_rate": 7.918096349173451e-07, "loss": 0.5609, "step": 12077 }, { "epoch": 1.75, "grad_norm": 6.146329402923584, "learning_rate": 7.91654823182064e-07, "loss": 0.5331, "step": 12078 }, { "epoch": 1.75, "grad_norm": 5.962604522705078, "learning_rate": 7.915000166665324e-07, "loss": 0.5464, "step": 12079 }, { "epoch": 1.75, "grad_norm": 6.427567481994629, "learning_rate": 7.913452153746294e-07, "loss": 0.5792, "step": 12080 }, { "epoch": 1.75, "grad_norm": 6.516001224517822, "learning_rate": 7.911904193102326e-07, "loss": 0.4939, "step": 12081 }, { "epoch": 1.75, "grad_norm": 5.698641777038574, "learning_rate": 7.91035628477221e-07, "loss": 0.4824, "step": 12082 }, { "epoch": 1.75, "grad_norm": 7.111538887023926, "learning_rate": 7.90880842879472e-07, "loss": 0.545, "step": 12083 }, { "epoch": 1.75, "grad_norm": 6.590331554412842, "learning_rate": 7.907260625208636e-07, "loss": 0.5855, "step": 12084 }, { "epoch": 1.75, "grad_norm": 6.114386081695557, "learning_rate": 7.90571287405274e-07, "loss": 0.4798, "step": 12085 }, { "epoch": 1.75, "grad_norm": 6.951726913452148, "learning_rate": 7.9041651753658e-07, "loss": 0.6094, "step": 12086 }, { "epoch": 1.75, "grad_norm": 7.334939479827881, "learning_rate": 7.902617529186601e-07, "loss": 0.5254, "step": 12087 }, { "epoch": 1.75, "grad_norm": 5.796732425689697, "learning_rate": 7.901069935553907e-07, "loss": 0.4854, "step": 12088 }, { "epoch": 1.75, "grad_norm": 6.634701728820801, "learning_rate": 7.899522394506501e-07, "loss": 0.5233, "step": 12089 }, { "epoch": 1.75, "grad_norm": 6.38083028793335, "learning_rate": 7.897974906083146e-07, "loss": 0.5263, "step": 12090 }, { "epoch": 1.75, "grad_norm": 6.780202388763428, "learning_rate": 7.896427470322617e-07, "loss": 0.5127, "step": 12091 }, { "epoch": 1.75, "grad_norm": 7.026352882385254, "learning_rate": 7.894880087263674e-07, "loss": 0.5557, "step": 12092 }, { "epoch": 1.75, "grad_norm": 6.670849323272705, "learning_rate": 7.893332756945095e-07, "loss": 0.5636, "step": 12093 }, { "epoch": 1.75, "grad_norm": 5.955441474914551, "learning_rate": 7.891785479405637e-07, "loss": 0.5794, "step": 12094 }, { "epoch": 1.75, "grad_norm": 7.026679515838623, "learning_rate": 7.890238254684072e-07, "loss": 0.5842, "step": 12095 }, { "epoch": 1.76, "grad_norm": 5.723817348480225, "learning_rate": 7.888691082819158e-07, "loss": 0.4942, "step": 12096 }, { "epoch": 1.76, "grad_norm": 7.036238193511963, "learning_rate": 7.887143963849658e-07, "loss": 0.4809, "step": 12097 }, { "epoch": 1.76, "grad_norm": 7.078674793243408, "learning_rate": 7.885596897814333e-07, "loss": 0.5006, "step": 12098 }, { "epoch": 1.76, "grad_norm": 6.574493408203125, "learning_rate": 7.884049884751943e-07, "loss": 0.5504, "step": 12099 }, { "epoch": 1.76, "grad_norm": 6.699593544006348, "learning_rate": 7.882502924701242e-07, "loss": 0.6535, "step": 12100 }, { "epoch": 1.76, "grad_norm": 6.648402690887451, "learning_rate": 7.880956017700994e-07, "loss": 0.5649, "step": 12101 }, { "epoch": 1.76, "grad_norm": 6.17400598526001, "learning_rate": 7.879409163789946e-07, "loss": 0.5321, "step": 12102 }, { "epoch": 1.76, "grad_norm": 6.891854286193848, "learning_rate": 7.877862363006859e-07, "loss": 0.5259, "step": 12103 }, { "epoch": 1.76, "grad_norm": 7.30604887008667, "learning_rate": 7.876315615390481e-07, "loss": 0.5509, "step": 12104 }, { "epoch": 1.76, "grad_norm": 6.019420146942139, "learning_rate": 7.874768920979566e-07, "loss": 0.5328, "step": 12105 }, { "epoch": 1.76, "grad_norm": 6.722374439239502, "learning_rate": 7.87322227981286e-07, "loss": 0.6094, "step": 12106 }, { "epoch": 1.76, "grad_norm": 7.132447719573975, "learning_rate": 7.871675691929117e-07, "loss": 0.512, "step": 12107 }, { "epoch": 1.76, "grad_norm": 7.570271968841553, "learning_rate": 7.870129157367078e-07, "loss": 0.538, "step": 12108 }, { "epoch": 1.76, "grad_norm": 6.914736747741699, "learning_rate": 7.868582676165496e-07, "loss": 0.5622, "step": 12109 }, { "epoch": 1.76, "grad_norm": 6.6369500160217285, "learning_rate": 7.867036248363107e-07, "loss": 0.5318, "step": 12110 }, { "epoch": 1.76, "grad_norm": 6.902735710144043, "learning_rate": 7.865489873998662e-07, "loss": 0.5052, "step": 12111 }, { "epoch": 1.76, "grad_norm": 6.519135475158691, "learning_rate": 7.863943553110902e-07, "loss": 0.5198, "step": 12112 }, { "epoch": 1.76, "grad_norm": 6.309717178344727, "learning_rate": 7.862397285738561e-07, "loss": 0.5407, "step": 12113 }, { "epoch": 1.76, "grad_norm": 6.971297264099121, "learning_rate": 7.860851071920387e-07, "loss": 0.5805, "step": 12114 }, { "epoch": 1.76, "grad_norm": 6.743807792663574, "learning_rate": 7.85930491169511e-07, "loss": 0.5329, "step": 12115 }, { "epoch": 1.76, "grad_norm": 5.845397472381592, "learning_rate": 7.857758805101474e-07, "loss": 0.4814, "step": 12116 }, { "epoch": 1.76, "grad_norm": 6.672336578369141, "learning_rate": 7.856212752178208e-07, "loss": 0.5291, "step": 12117 }, { "epoch": 1.76, "grad_norm": 6.1862382888793945, "learning_rate": 7.854666752964048e-07, "loss": 0.4761, "step": 12118 }, { "epoch": 1.76, "grad_norm": 6.669535160064697, "learning_rate": 7.853120807497728e-07, "loss": 0.471, "step": 12119 }, { "epoch": 1.76, "grad_norm": 6.68248987197876, "learning_rate": 7.851574915817979e-07, "loss": 0.5219, "step": 12120 }, { "epoch": 1.76, "grad_norm": 6.966475009918213, "learning_rate": 7.850029077963526e-07, "loss": 0.5059, "step": 12121 }, { "epoch": 1.76, "grad_norm": 6.27947998046875, "learning_rate": 7.848483293973105e-07, "loss": 0.5339, "step": 12122 }, { "epoch": 1.76, "grad_norm": 6.907199382781982, "learning_rate": 7.846937563885434e-07, "loss": 0.5206, "step": 12123 }, { "epoch": 1.76, "grad_norm": 6.694535732269287, "learning_rate": 7.84539188773925e-07, "loss": 0.6127, "step": 12124 }, { "epoch": 1.76, "grad_norm": 7.130541801452637, "learning_rate": 7.843846265573268e-07, "loss": 0.4377, "step": 12125 }, { "epoch": 1.76, "grad_norm": 7.499060153961182, "learning_rate": 7.842300697426217e-07, "loss": 0.5504, "step": 12126 }, { "epoch": 1.76, "grad_norm": 6.200793743133545, "learning_rate": 7.840755183336812e-07, "loss": 0.5249, "step": 12127 }, { "epoch": 1.76, "grad_norm": 6.019963264465332, "learning_rate": 7.839209723343782e-07, "loss": 0.4921, "step": 12128 }, { "epoch": 1.76, "grad_norm": 6.875306606292725, "learning_rate": 7.837664317485836e-07, "loss": 0.5437, "step": 12129 }, { "epoch": 1.76, "grad_norm": 7.5412702560424805, "learning_rate": 7.836118965801702e-07, "loss": 0.5438, "step": 12130 }, { "epoch": 1.76, "grad_norm": 6.454720973968506, "learning_rate": 7.834573668330088e-07, "loss": 0.5246, "step": 12131 }, { "epoch": 1.76, "grad_norm": 6.294787883758545, "learning_rate": 7.833028425109716e-07, "loss": 0.5344, "step": 12132 }, { "epoch": 1.76, "grad_norm": 6.208508491516113, "learning_rate": 7.831483236179293e-07, "loss": 0.5102, "step": 12133 }, { "epoch": 1.76, "grad_norm": 6.113734245300293, "learning_rate": 7.829938101577537e-07, "loss": 0.4958, "step": 12134 }, { "epoch": 1.76, "grad_norm": 6.259418487548828, "learning_rate": 7.828393021343152e-07, "loss": 0.5527, "step": 12135 }, { "epoch": 1.76, "grad_norm": 6.76164436340332, "learning_rate": 7.826847995514853e-07, "loss": 0.5881, "step": 12136 }, { "epoch": 1.76, "grad_norm": 6.739274501800537, "learning_rate": 7.825303024131344e-07, "loss": 0.5103, "step": 12137 }, { "epoch": 1.76, "grad_norm": 6.291569709777832, "learning_rate": 7.823758107231338e-07, "loss": 0.5489, "step": 12138 }, { "epoch": 1.76, "grad_norm": 6.692522048950195, "learning_rate": 7.822213244853538e-07, "loss": 0.5998, "step": 12139 }, { "epoch": 1.76, "grad_norm": 6.444553852081299, "learning_rate": 7.82066843703664e-07, "loss": 0.5468, "step": 12140 }, { "epoch": 1.76, "grad_norm": 6.357206344604492, "learning_rate": 7.81912368381936e-07, "loss": 0.4934, "step": 12141 }, { "epoch": 1.76, "grad_norm": 6.023809909820557, "learning_rate": 7.817578985240388e-07, "loss": 0.4922, "step": 12142 }, { "epoch": 1.76, "grad_norm": 6.733238220214844, "learning_rate": 7.816034341338431e-07, "loss": 0.5213, "step": 12143 }, { "epoch": 1.76, "grad_norm": 6.482909679412842, "learning_rate": 7.814489752152182e-07, "loss": 0.5866, "step": 12144 }, { "epoch": 1.76, "grad_norm": 6.369799613952637, "learning_rate": 7.812945217720346e-07, "loss": 0.5606, "step": 12145 }, { "epoch": 1.76, "grad_norm": 6.69529390335083, "learning_rate": 7.81140073808161e-07, "loss": 0.5229, "step": 12146 }, { "epoch": 1.76, "grad_norm": 6.990809440612793, "learning_rate": 7.809856313274677e-07, "loss": 0.5671, "step": 12147 }, { "epoch": 1.76, "grad_norm": 7.528623104095459, "learning_rate": 7.80831194333823e-07, "loss": 0.4919, "step": 12148 }, { "epoch": 1.76, "grad_norm": 6.566235542297363, "learning_rate": 7.80676762831097e-07, "loss": 0.5035, "step": 12149 }, { "epoch": 1.76, "grad_norm": 5.6296491622924805, "learning_rate": 7.80522336823158e-07, "loss": 0.4927, "step": 12150 }, { "epoch": 1.76, "grad_norm": 6.996448040008545, "learning_rate": 7.803679163138754e-07, "loss": 0.5332, "step": 12151 }, { "epoch": 1.76, "grad_norm": 6.7924323081970215, "learning_rate": 7.802135013071179e-07, "loss": 0.5387, "step": 12152 }, { "epoch": 1.76, "grad_norm": 7.130888938903809, "learning_rate": 7.800590918067539e-07, "loss": 0.5273, "step": 12153 }, { "epoch": 1.76, "grad_norm": 7.013000011444092, "learning_rate": 7.79904687816652e-07, "loss": 0.5208, "step": 12154 }, { "epoch": 1.76, "grad_norm": 6.592303276062012, "learning_rate": 7.797502893406807e-07, "loss": 0.5194, "step": 12155 }, { "epoch": 1.76, "grad_norm": 5.822054862976074, "learning_rate": 7.795958963827077e-07, "loss": 0.4684, "step": 12156 }, { "epoch": 1.76, "grad_norm": 6.245602130889893, "learning_rate": 7.794415089466018e-07, "loss": 0.5676, "step": 12157 }, { "epoch": 1.76, "grad_norm": 6.683164119720459, "learning_rate": 7.7928712703623e-07, "loss": 0.5072, "step": 12158 }, { "epoch": 1.76, "grad_norm": 7.734234809875488, "learning_rate": 7.79132750655461e-07, "loss": 0.5288, "step": 12159 }, { "epoch": 1.76, "grad_norm": 6.321449279785156, "learning_rate": 7.789783798081621e-07, "loss": 0.5461, "step": 12160 }, { "epoch": 1.76, "grad_norm": 6.81411075592041, "learning_rate": 7.788240144982008e-07, "loss": 0.5337, "step": 12161 }, { "epoch": 1.76, "grad_norm": 6.1660380363464355, "learning_rate": 7.78669654729444e-07, "loss": 0.523, "step": 12162 }, { "epoch": 1.76, "grad_norm": 6.121611595153809, "learning_rate": 7.785153005057594e-07, "loss": 0.4414, "step": 12163 }, { "epoch": 1.76, "grad_norm": 6.848551273345947, "learning_rate": 7.783609518310145e-07, "loss": 0.5372, "step": 12164 }, { "epoch": 1.77, "grad_norm": 6.433982849121094, "learning_rate": 7.782066087090757e-07, "loss": 0.5313, "step": 12165 }, { "epoch": 1.77, "grad_norm": 6.745330810546875, "learning_rate": 7.780522711438099e-07, "loss": 0.4959, "step": 12166 }, { "epoch": 1.77, "grad_norm": 7.285367965698242, "learning_rate": 7.778979391390837e-07, "loss": 0.5309, "step": 12167 }, { "epoch": 1.77, "grad_norm": 6.07822847366333, "learning_rate": 7.777436126987641e-07, "loss": 0.5585, "step": 12168 }, { "epoch": 1.77, "grad_norm": 6.992798328399658, "learning_rate": 7.775892918267167e-07, "loss": 0.4809, "step": 12169 }, { "epoch": 1.77, "grad_norm": 6.947946548461914, "learning_rate": 7.774349765268086e-07, "loss": 0.5177, "step": 12170 }, { "epoch": 1.77, "grad_norm": 7.659152030944824, "learning_rate": 7.772806668029052e-07, "loss": 0.5414, "step": 12171 }, { "epoch": 1.77, "grad_norm": 6.7116312980651855, "learning_rate": 7.771263626588731e-07, "loss": 0.5419, "step": 12172 }, { "epoch": 1.77, "grad_norm": 6.562631607055664, "learning_rate": 7.769720640985777e-07, "loss": 0.5047, "step": 12173 }, { "epoch": 1.77, "grad_norm": 6.675718784332275, "learning_rate": 7.768177711258852e-07, "loss": 0.5815, "step": 12174 }, { "epoch": 1.77, "grad_norm": 7.12170934677124, "learning_rate": 7.766634837446601e-07, "loss": 0.5293, "step": 12175 }, { "epoch": 1.77, "grad_norm": 6.814412593841553, "learning_rate": 7.765092019587692e-07, "loss": 0.5887, "step": 12176 }, { "epoch": 1.77, "grad_norm": 7.401773929595947, "learning_rate": 7.763549257720767e-07, "loss": 0.6006, "step": 12177 }, { "epoch": 1.77, "grad_norm": 6.818055629730225, "learning_rate": 7.762006551884484e-07, "loss": 0.5844, "step": 12178 }, { "epoch": 1.77, "grad_norm": 6.633454322814941, "learning_rate": 7.760463902117488e-07, "loss": 0.5961, "step": 12179 }, { "epoch": 1.77, "grad_norm": 6.038140773773193, "learning_rate": 7.758921308458433e-07, "loss": 0.6164, "step": 12180 }, { "epoch": 1.77, "grad_norm": 8.234827041625977, "learning_rate": 7.757378770945961e-07, "loss": 0.4977, "step": 12181 }, { "epoch": 1.77, "grad_norm": 6.755438327789307, "learning_rate": 7.755836289618723e-07, "loss": 0.5957, "step": 12182 }, { "epoch": 1.77, "grad_norm": 6.347311019897461, "learning_rate": 7.754293864515357e-07, "loss": 0.5706, "step": 12183 }, { "epoch": 1.77, "grad_norm": 6.525112152099609, "learning_rate": 7.752751495674512e-07, "loss": 0.5181, "step": 12184 }, { "epoch": 1.77, "grad_norm": 6.195094585418701, "learning_rate": 7.751209183134823e-07, "loss": 0.5516, "step": 12185 }, { "epoch": 1.77, "grad_norm": 6.882475852966309, "learning_rate": 7.749666926934938e-07, "loss": 0.5326, "step": 12186 }, { "epoch": 1.77, "grad_norm": 6.467942237854004, "learning_rate": 7.748124727113488e-07, "loss": 0.5234, "step": 12187 }, { "epoch": 1.77, "grad_norm": 5.559238433837891, "learning_rate": 7.746582583709118e-07, "loss": 0.4793, "step": 12188 }, { "epoch": 1.77, "grad_norm": 6.707372665405273, "learning_rate": 7.745040496760457e-07, "loss": 0.4835, "step": 12189 }, { "epoch": 1.77, "grad_norm": 6.405599594116211, "learning_rate": 7.743498466306142e-07, "loss": 0.5282, "step": 12190 }, { "epoch": 1.77, "grad_norm": 5.7561163902282715, "learning_rate": 7.74195649238481e-07, "loss": 0.5513, "step": 12191 }, { "epoch": 1.77, "grad_norm": 6.514073371887207, "learning_rate": 7.740414575035084e-07, "loss": 0.4798, "step": 12192 }, { "epoch": 1.77, "grad_norm": 7.120181560516357, "learning_rate": 7.738872714295606e-07, "loss": 0.5239, "step": 12193 }, { "epoch": 1.77, "grad_norm": 6.463249683380127, "learning_rate": 7.737330910204993e-07, "loss": 0.4786, "step": 12194 }, { "epoch": 1.77, "grad_norm": 7.297149181365967, "learning_rate": 7.735789162801882e-07, "loss": 0.529, "step": 12195 }, { "epoch": 1.77, "grad_norm": 6.513862133026123, "learning_rate": 7.73424747212489e-07, "loss": 0.518, "step": 12196 }, { "epoch": 1.77, "grad_norm": 6.372696876525879, "learning_rate": 7.73270583821265e-07, "loss": 0.5197, "step": 12197 }, { "epoch": 1.77, "grad_norm": 6.439490795135498, "learning_rate": 7.731164261103776e-07, "loss": 0.5151, "step": 12198 }, { "epoch": 1.77, "grad_norm": 6.500616550445557, "learning_rate": 7.7296227408369e-07, "loss": 0.5745, "step": 12199 }, { "epoch": 1.77, "grad_norm": 6.640746593475342, "learning_rate": 7.728081277450634e-07, "loss": 0.4576, "step": 12200 }, { "epoch": 1.77, "grad_norm": 6.287537097930908, "learning_rate": 7.726539870983604e-07, "loss": 0.4581, "step": 12201 }, { "epoch": 1.77, "grad_norm": 6.936354637145996, "learning_rate": 7.724998521474421e-07, "loss": 0.5471, "step": 12202 }, { "epoch": 1.77, "grad_norm": 6.3369879722595215, "learning_rate": 7.723457228961705e-07, "loss": 0.4821, "step": 12203 }, { "epoch": 1.77, "grad_norm": 6.088594913482666, "learning_rate": 7.721915993484066e-07, "loss": 0.4995, "step": 12204 }, { "epoch": 1.77, "grad_norm": 6.581705093383789, "learning_rate": 7.720374815080124e-07, "loss": 0.5367, "step": 12205 }, { "epoch": 1.77, "grad_norm": 6.782190799713135, "learning_rate": 7.718833693788483e-07, "loss": 0.4812, "step": 12206 }, { "epoch": 1.77, "grad_norm": 6.715167999267578, "learning_rate": 7.717292629647762e-07, "loss": 0.4385, "step": 12207 }, { "epoch": 1.77, "grad_norm": 6.718217372894287, "learning_rate": 7.715751622696562e-07, "loss": 0.471, "step": 12208 }, { "epoch": 1.77, "grad_norm": 7.386861801147461, "learning_rate": 7.714210672973493e-07, "loss": 0.4947, "step": 12209 }, { "epoch": 1.77, "grad_norm": 7.247901916503906, "learning_rate": 7.712669780517162e-07, "loss": 0.521, "step": 12210 }, { "epoch": 1.77, "grad_norm": 6.983191013336182, "learning_rate": 7.711128945366177e-07, "loss": 0.4838, "step": 12211 }, { "epoch": 1.77, "grad_norm": 6.160953998565674, "learning_rate": 7.709588167559131e-07, "loss": 0.4593, "step": 12212 }, { "epoch": 1.77, "grad_norm": 6.501087188720703, "learning_rate": 7.708047447134638e-07, "loss": 0.6083, "step": 12213 }, { "epoch": 1.77, "grad_norm": 6.371318340301514, "learning_rate": 7.706506784131287e-07, "loss": 0.5276, "step": 12214 }, { "epoch": 1.77, "grad_norm": 6.217432975769043, "learning_rate": 7.704966178587684e-07, "loss": 0.5067, "step": 12215 }, { "epoch": 1.77, "grad_norm": 6.997569561004639, "learning_rate": 7.703425630542426e-07, "loss": 0.5417, "step": 12216 }, { "epoch": 1.77, "grad_norm": 6.014520645141602, "learning_rate": 7.701885140034104e-07, "loss": 0.4632, "step": 12217 }, { "epoch": 1.77, "grad_norm": 7.4419755935668945, "learning_rate": 7.700344707101319e-07, "loss": 0.5378, "step": 12218 }, { "epoch": 1.77, "grad_norm": 6.289628505706787, "learning_rate": 7.698804331782658e-07, "loss": 0.5338, "step": 12219 }, { "epoch": 1.77, "grad_norm": 6.262538909912109, "learning_rate": 7.697264014116718e-07, "loss": 0.5005, "step": 12220 }, { "epoch": 1.77, "grad_norm": 6.595695495605469, "learning_rate": 7.695723754142086e-07, "loss": 0.4687, "step": 12221 }, { "epoch": 1.77, "grad_norm": 6.7512736320495605, "learning_rate": 7.69418355189735e-07, "loss": 0.5907, "step": 12222 }, { "epoch": 1.77, "grad_norm": 7.176580429077148, "learning_rate": 7.6926434074211e-07, "loss": 0.5347, "step": 12223 }, { "epoch": 1.77, "grad_norm": 6.329965591430664, "learning_rate": 7.691103320751921e-07, "loss": 0.4915, "step": 12224 }, { "epoch": 1.77, "grad_norm": 6.502071857452393, "learning_rate": 7.689563291928394e-07, "loss": 0.5632, "step": 12225 }, { "epoch": 1.77, "grad_norm": 6.544711589813232, "learning_rate": 7.688023320989107e-07, "loss": 0.5145, "step": 12226 }, { "epoch": 1.77, "grad_norm": 6.698558330535889, "learning_rate": 7.686483407972636e-07, "loss": 0.5569, "step": 12227 }, { "epoch": 1.77, "grad_norm": 6.6963372230529785, "learning_rate": 7.684943552917569e-07, "loss": 0.5442, "step": 12228 }, { "epoch": 1.77, "grad_norm": 6.257212162017822, "learning_rate": 7.683403755862476e-07, "loss": 0.5518, "step": 12229 }, { "epoch": 1.77, "grad_norm": 6.773543357849121, "learning_rate": 7.681864016845942e-07, "loss": 0.5822, "step": 12230 }, { "epoch": 1.77, "grad_norm": 6.692981719970703, "learning_rate": 7.680324335906531e-07, "loss": 0.6142, "step": 12231 }, { "epoch": 1.77, "grad_norm": 6.254517555236816, "learning_rate": 7.678784713082832e-07, "loss": 0.5379, "step": 12232 }, { "epoch": 1.77, "grad_norm": 6.888652801513672, "learning_rate": 7.677245148413405e-07, "loss": 0.5979, "step": 12233 }, { "epoch": 1.78, "grad_norm": 6.9829888343811035, "learning_rate": 7.675705641936832e-07, "loss": 0.4836, "step": 12234 }, { "epoch": 1.78, "grad_norm": 5.920987606048584, "learning_rate": 7.674166193691671e-07, "loss": 0.4934, "step": 12235 }, { "epoch": 1.78, "grad_norm": 6.676232814788818, "learning_rate": 7.672626803716503e-07, "loss": 0.5552, "step": 12236 }, { "epoch": 1.78, "grad_norm": 6.190250873565674, "learning_rate": 7.671087472049886e-07, "loss": 0.5783, "step": 12237 }, { "epoch": 1.78, "grad_norm": 6.907223701477051, "learning_rate": 7.669548198730391e-07, "loss": 0.4979, "step": 12238 }, { "epoch": 1.78, "grad_norm": 7.524201393127441, "learning_rate": 7.668008983796575e-07, "loss": 0.5545, "step": 12239 }, { "epoch": 1.78, "grad_norm": 6.360827922821045, "learning_rate": 7.66646982728701e-07, "loss": 0.5421, "step": 12240 }, { "epoch": 1.78, "grad_norm": 6.777196407318115, "learning_rate": 7.664930729240247e-07, "loss": 0.4941, "step": 12241 }, { "epoch": 1.78, "grad_norm": 5.906474590301514, "learning_rate": 7.663391689694852e-07, "loss": 0.4855, "step": 12242 }, { "epoch": 1.78, "grad_norm": 6.6622772216796875, "learning_rate": 7.661852708689383e-07, "loss": 0.5925, "step": 12243 }, { "epoch": 1.78, "grad_norm": 6.702294826507568, "learning_rate": 7.660313786262393e-07, "loss": 0.5209, "step": 12244 }, { "epoch": 1.78, "grad_norm": 7.360747814178467, "learning_rate": 7.658774922452442e-07, "loss": 0.6497, "step": 12245 }, { "epoch": 1.78, "grad_norm": 6.001851558685303, "learning_rate": 7.657236117298077e-07, "loss": 0.5327, "step": 12246 }, { "epoch": 1.78, "grad_norm": 6.342251777648926, "learning_rate": 7.655697370837859e-07, "loss": 0.4989, "step": 12247 }, { "epoch": 1.78, "grad_norm": 7.038645267486572, "learning_rate": 7.654158683110331e-07, "loss": 0.5583, "step": 12248 }, { "epoch": 1.78, "grad_norm": 5.759379863739014, "learning_rate": 7.65262005415405e-07, "loss": 0.4355, "step": 12249 }, { "epoch": 1.78, "grad_norm": 6.596144199371338, "learning_rate": 7.651081484007555e-07, "loss": 0.5981, "step": 12250 }, { "epoch": 1.78, "grad_norm": 6.795217990875244, "learning_rate": 7.649542972709402e-07, "loss": 0.5102, "step": 12251 }, { "epoch": 1.78, "grad_norm": 6.445479869842529, "learning_rate": 7.648004520298125e-07, "loss": 0.5857, "step": 12252 }, { "epoch": 1.78, "grad_norm": 7.7336578369140625, "learning_rate": 7.646466126812277e-07, "loss": 0.5442, "step": 12253 }, { "epoch": 1.78, "grad_norm": 7.278473377227783, "learning_rate": 7.644927792290392e-07, "loss": 0.5216, "step": 12254 }, { "epoch": 1.78, "grad_norm": 6.886351108551025, "learning_rate": 7.643389516771019e-07, "loss": 0.5125, "step": 12255 }, { "epoch": 1.78, "grad_norm": 7.039881706237793, "learning_rate": 7.641851300292691e-07, "loss": 0.5452, "step": 12256 }, { "epoch": 1.78, "grad_norm": 5.757345676422119, "learning_rate": 7.640313142893947e-07, "loss": 0.5309, "step": 12257 }, { "epoch": 1.78, "grad_norm": 6.124300003051758, "learning_rate": 7.638775044613323e-07, "loss": 0.4921, "step": 12258 }, { "epoch": 1.78, "grad_norm": 6.88641357421875, "learning_rate": 7.637237005489356e-07, "loss": 0.5374, "step": 12259 }, { "epoch": 1.78, "grad_norm": 6.690357208251953, "learning_rate": 7.635699025560571e-07, "loss": 0.5225, "step": 12260 }, { "epoch": 1.78, "grad_norm": 6.638484954833984, "learning_rate": 7.63416110486551e-07, "loss": 0.5076, "step": 12261 }, { "epoch": 1.78, "grad_norm": 7.058379650115967, "learning_rate": 7.632623243442695e-07, "loss": 0.5604, "step": 12262 }, { "epoch": 1.78, "grad_norm": 7.255815029144287, "learning_rate": 7.631085441330663e-07, "loss": 0.5707, "step": 12263 }, { "epoch": 1.78, "grad_norm": 6.517793655395508, "learning_rate": 7.629547698567933e-07, "loss": 0.4834, "step": 12264 }, { "epoch": 1.78, "grad_norm": 6.719098091125488, "learning_rate": 7.628010015193036e-07, "loss": 0.5566, "step": 12265 }, { "epoch": 1.78, "grad_norm": 6.0611138343811035, "learning_rate": 7.62647239124449e-07, "loss": 0.515, "step": 12266 }, { "epoch": 1.78, "grad_norm": 6.512388706207275, "learning_rate": 7.624934826760822e-07, "loss": 0.5632, "step": 12267 }, { "epoch": 1.78, "grad_norm": 7.412616729736328, "learning_rate": 7.623397321780556e-07, "loss": 0.4874, "step": 12268 }, { "epoch": 1.78, "grad_norm": 7.340063095092773, "learning_rate": 7.621859876342204e-07, "loss": 0.6348, "step": 12269 }, { "epoch": 1.78, "grad_norm": 6.877311706542969, "learning_rate": 7.620322490484296e-07, "loss": 0.5676, "step": 12270 }, { "epoch": 1.78, "grad_norm": 6.906912326812744, "learning_rate": 7.618785164245336e-07, "loss": 0.5458, "step": 12271 }, { "epoch": 1.78, "grad_norm": 6.209378242492676, "learning_rate": 7.617247897663849e-07, "loss": 0.5616, "step": 12272 }, { "epoch": 1.78, "grad_norm": 6.727180004119873, "learning_rate": 7.615710690778339e-07, "loss": 0.6287, "step": 12273 }, { "epoch": 1.78, "grad_norm": 6.090447425842285, "learning_rate": 7.614173543627327e-07, "loss": 0.5738, "step": 12274 }, { "epoch": 1.78, "grad_norm": 7.14206600189209, "learning_rate": 7.612636456249318e-07, "loss": 0.6231, "step": 12275 }, { "epoch": 1.78, "grad_norm": 7.120176792144775, "learning_rate": 7.611099428682825e-07, "loss": 0.5654, "step": 12276 }, { "epoch": 1.78, "grad_norm": 7.072881698608398, "learning_rate": 7.609562460966354e-07, "loss": 0.5405, "step": 12277 }, { "epoch": 1.78, "grad_norm": 7.840379238128662, "learning_rate": 7.608025553138413e-07, "loss": 0.5656, "step": 12278 }, { "epoch": 1.78, "grad_norm": 6.536252498626709, "learning_rate": 7.606488705237502e-07, "loss": 0.5175, "step": 12279 }, { "epoch": 1.78, "grad_norm": 7.131744861602783, "learning_rate": 7.604951917302131e-07, "loss": 0.5934, "step": 12280 }, { "epoch": 1.78, "grad_norm": 6.872917652130127, "learning_rate": 7.603415189370795e-07, "loss": 0.61, "step": 12281 }, { "epoch": 1.78, "grad_norm": 6.238345623016357, "learning_rate": 7.601878521482002e-07, "loss": 0.4667, "step": 12282 }, { "epoch": 1.78, "grad_norm": 6.1281418800354, "learning_rate": 7.60034191367424e-07, "loss": 0.5528, "step": 12283 }, { "epoch": 1.78, "grad_norm": 6.351052761077881, "learning_rate": 7.598805365986016e-07, "loss": 0.5477, "step": 12284 }, { "epoch": 1.78, "grad_norm": 5.586365222930908, "learning_rate": 7.597268878455821e-07, "loss": 0.5123, "step": 12285 }, { "epoch": 1.78, "grad_norm": 6.7047014236450195, "learning_rate": 7.595732451122153e-07, "loss": 0.4876, "step": 12286 }, { "epoch": 1.78, "grad_norm": 5.925094127655029, "learning_rate": 7.594196084023496e-07, "loss": 0.4858, "step": 12287 }, { "epoch": 1.78, "grad_norm": 6.6795806884765625, "learning_rate": 7.592659777198354e-07, "loss": 0.5679, "step": 12288 }, { "epoch": 1.78, "grad_norm": 6.6077680587768555, "learning_rate": 7.591123530685204e-07, "loss": 0.4915, "step": 12289 }, { "epoch": 1.78, "grad_norm": 6.499453067779541, "learning_rate": 7.589587344522544e-07, "loss": 0.4271, "step": 12290 }, { "epoch": 1.78, "grad_norm": 6.5434160232543945, "learning_rate": 7.588051218748852e-07, "loss": 0.5444, "step": 12291 }, { "epoch": 1.78, "grad_norm": 7.028133869171143, "learning_rate": 7.586515153402622e-07, "loss": 0.5427, "step": 12292 }, { "epoch": 1.78, "grad_norm": 6.917437553405762, "learning_rate": 7.584979148522332e-07, "loss": 0.5195, "step": 12293 }, { "epoch": 1.78, "grad_norm": 6.107861042022705, "learning_rate": 7.583443204146463e-07, "loss": 0.4734, "step": 12294 }, { "epoch": 1.78, "grad_norm": 7.073827266693115, "learning_rate": 7.581907320313502e-07, "loss": 0.5168, "step": 12295 }, { "epoch": 1.78, "grad_norm": 6.798583984375, "learning_rate": 7.58037149706192e-07, "loss": 0.4667, "step": 12296 }, { "epoch": 1.78, "grad_norm": 6.674002647399902, "learning_rate": 7.578835734430203e-07, "loss": 0.5271, "step": 12297 }, { "epoch": 1.78, "grad_norm": 6.773569107055664, "learning_rate": 7.577300032456821e-07, "loss": 0.47, "step": 12298 }, { "epoch": 1.78, "grad_norm": 6.277410984039307, "learning_rate": 7.575764391180251e-07, "loss": 0.5069, "step": 12299 }, { "epoch": 1.78, "grad_norm": 6.316583156585693, "learning_rate": 7.574228810638962e-07, "loss": 0.5979, "step": 12300 }, { "epoch": 1.78, "grad_norm": 6.822497844696045, "learning_rate": 7.572693290871432e-07, "loss": 0.5723, "step": 12301 }, { "epoch": 1.78, "grad_norm": 6.484381675720215, "learning_rate": 7.571157831916124e-07, "loss": 0.5067, "step": 12302 }, { "epoch": 1.79, "grad_norm": 6.366987228393555, "learning_rate": 7.569622433811515e-07, "loss": 0.5237, "step": 12303 }, { "epoch": 1.79, "grad_norm": 7.0691070556640625, "learning_rate": 7.568087096596062e-07, "loss": 0.5804, "step": 12304 }, { "epoch": 1.79, "grad_norm": 7.66662073135376, "learning_rate": 7.56655182030824e-07, "loss": 0.6689, "step": 12305 }, { "epoch": 1.79, "grad_norm": 6.702240943908691, "learning_rate": 7.565016604986506e-07, "loss": 0.5223, "step": 12306 }, { "epoch": 1.79, "grad_norm": 6.483266830444336, "learning_rate": 7.563481450669327e-07, "loss": 0.509, "step": 12307 }, { "epoch": 1.79, "grad_norm": 6.649682521820068, "learning_rate": 7.561946357395155e-07, "loss": 0.5823, "step": 12308 }, { "epoch": 1.79, "grad_norm": 6.072474002838135, "learning_rate": 7.560411325202462e-07, "loss": 0.529, "step": 12309 }, { "epoch": 1.79, "grad_norm": 7.066789627075195, "learning_rate": 7.558876354129695e-07, "loss": 0.5502, "step": 12310 }, { "epoch": 1.79, "grad_norm": 7.512253761291504, "learning_rate": 7.557341444215319e-07, "loss": 0.5825, "step": 12311 }, { "epoch": 1.79, "grad_norm": 5.9402689933776855, "learning_rate": 7.55580659549778e-07, "loss": 0.4787, "step": 12312 }, { "epoch": 1.79, "grad_norm": 6.509079456329346, "learning_rate": 7.554271808015538e-07, "loss": 0.5764, "step": 12313 }, { "epoch": 1.79, "grad_norm": 6.029970645904541, "learning_rate": 7.55273708180704e-07, "loss": 0.5936, "step": 12314 }, { "epoch": 1.79, "grad_norm": 6.7974138259887695, "learning_rate": 7.55120241691074e-07, "loss": 0.4958, "step": 12315 }, { "epoch": 1.79, "grad_norm": 6.758362293243408, "learning_rate": 7.549667813365082e-07, "loss": 0.574, "step": 12316 }, { "epoch": 1.79, "grad_norm": 6.503918647766113, "learning_rate": 7.548133271208519e-07, "loss": 0.5796, "step": 12317 }, { "epoch": 1.79, "grad_norm": 6.508408069610596, "learning_rate": 7.546598790479488e-07, "loss": 0.5762, "step": 12318 }, { "epoch": 1.79, "grad_norm": 6.170982360839844, "learning_rate": 7.545064371216441e-07, "loss": 0.5102, "step": 12319 }, { "epoch": 1.79, "grad_norm": 5.77006196975708, "learning_rate": 7.543530013457819e-07, "loss": 0.532, "step": 12320 }, { "epoch": 1.79, "grad_norm": 6.5127129554748535, "learning_rate": 7.541995717242058e-07, "loss": 0.453, "step": 12321 }, { "epoch": 1.79, "grad_norm": 6.109644889831543, "learning_rate": 7.540461482607604e-07, "loss": 0.5248, "step": 12322 }, { "epoch": 1.79, "grad_norm": 6.439640045166016, "learning_rate": 7.538927309592886e-07, "loss": 0.5114, "step": 12323 }, { "epoch": 1.79, "grad_norm": 6.944678783416748, "learning_rate": 7.537393198236351e-07, "loss": 0.547, "step": 12324 }, { "epoch": 1.79, "grad_norm": 6.557678699493408, "learning_rate": 7.535859148576426e-07, "loss": 0.5281, "step": 12325 }, { "epoch": 1.79, "grad_norm": 6.776534080505371, "learning_rate": 7.534325160651546e-07, "loss": 0.514, "step": 12326 }, { "epoch": 1.79, "grad_norm": 7.315981864929199, "learning_rate": 7.532791234500142e-07, "loss": 0.5145, "step": 12327 }, { "epoch": 1.79, "grad_norm": 6.527387619018555, "learning_rate": 7.531257370160647e-07, "loss": 0.5261, "step": 12328 }, { "epoch": 1.79, "grad_norm": 6.891139030456543, "learning_rate": 7.529723567671484e-07, "loss": 0.5085, "step": 12329 }, { "epoch": 1.79, "grad_norm": 7.107299327850342, "learning_rate": 7.528189827071088e-07, "loss": 0.5951, "step": 12330 }, { "epoch": 1.79, "grad_norm": 7.565052032470703, "learning_rate": 7.526656148397874e-07, "loss": 0.5617, "step": 12331 }, { "epoch": 1.79, "grad_norm": 6.6989898681640625, "learning_rate": 7.525122531690275e-07, "loss": 0.5553, "step": 12332 }, { "epoch": 1.79, "grad_norm": 6.824155807495117, "learning_rate": 7.52358897698671e-07, "loss": 0.6166, "step": 12333 }, { "epoch": 1.79, "grad_norm": 5.552285671234131, "learning_rate": 7.5220554843256e-07, "loss": 0.4303, "step": 12334 }, { "epoch": 1.79, "grad_norm": 6.084712028503418, "learning_rate": 7.52052205374536e-07, "loss": 0.5162, "step": 12335 }, { "epoch": 1.79, "grad_norm": 7.034160614013672, "learning_rate": 7.518988685284414e-07, "loss": 0.558, "step": 12336 }, { "epoch": 1.79, "grad_norm": 6.087764263153076, "learning_rate": 7.517455378981172e-07, "loss": 0.4671, "step": 12337 }, { "epoch": 1.79, "grad_norm": 5.956851959228516, "learning_rate": 7.515922134874057e-07, "loss": 0.4793, "step": 12338 }, { "epoch": 1.79, "grad_norm": 6.394683361053467, "learning_rate": 7.514388953001472e-07, "loss": 0.5313, "step": 12339 }, { "epoch": 1.79, "grad_norm": 6.660313129425049, "learning_rate": 7.512855833401837e-07, "loss": 0.5544, "step": 12340 }, { "epoch": 1.79, "grad_norm": 7.3901519775390625, "learning_rate": 7.511322776113555e-07, "loss": 0.6107, "step": 12341 }, { "epoch": 1.79, "grad_norm": 5.955560207366943, "learning_rate": 7.50978978117504e-07, "loss": 0.5118, "step": 12342 }, { "epoch": 1.79, "grad_norm": 6.201480388641357, "learning_rate": 7.508256848624692e-07, "loss": 0.6041, "step": 12343 }, { "epoch": 1.79, "grad_norm": 5.910171985626221, "learning_rate": 7.506723978500924e-07, "loss": 0.5038, "step": 12344 }, { "epoch": 1.79, "grad_norm": 6.602179527282715, "learning_rate": 7.505191170842131e-07, "loss": 0.5024, "step": 12345 }, { "epoch": 1.79, "grad_norm": 6.097191333770752, "learning_rate": 7.503658425686722e-07, "loss": 0.5178, "step": 12346 }, { "epoch": 1.79, "grad_norm": 5.898277282714844, "learning_rate": 7.502125743073095e-07, "loss": 0.4392, "step": 12347 }, { "epoch": 1.79, "grad_norm": 6.640530586242676, "learning_rate": 7.500593123039648e-07, "loss": 0.5219, "step": 12348 }, { "epoch": 1.79, "grad_norm": 6.101967811584473, "learning_rate": 7.499060565624782e-07, "loss": 0.5248, "step": 12349 }, { "epoch": 1.79, "grad_norm": 7.15219259262085, "learning_rate": 7.497528070866887e-07, "loss": 0.5102, "step": 12350 }, { "epoch": 1.79, "grad_norm": 5.973567485809326, "learning_rate": 7.495995638804362e-07, "loss": 0.4921, "step": 12351 }, { "epoch": 1.79, "grad_norm": 7.65022611618042, "learning_rate": 7.494463269475594e-07, "loss": 0.5731, "step": 12352 }, { "epoch": 1.79, "grad_norm": 7.004607200622559, "learning_rate": 7.492930962918984e-07, "loss": 0.5822, "step": 12353 }, { "epoch": 1.79, "grad_norm": 6.277834892272949, "learning_rate": 7.491398719172911e-07, "loss": 0.5034, "step": 12354 }, { "epoch": 1.79, "grad_norm": 7.296166896820068, "learning_rate": 7.489866538275769e-07, "loss": 0.5382, "step": 12355 }, { "epoch": 1.79, "grad_norm": 5.99431848526001, "learning_rate": 7.488334420265941e-07, "loss": 0.5063, "step": 12356 }, { "epoch": 1.79, "grad_norm": 6.2909255027771, "learning_rate": 7.486802365181814e-07, "loss": 0.5668, "step": 12357 }, { "epoch": 1.79, "grad_norm": 7.3427042961120605, "learning_rate": 7.485270373061767e-07, "loss": 0.5689, "step": 12358 }, { "epoch": 1.79, "grad_norm": 6.551609992980957, "learning_rate": 7.48373844394419e-07, "loss": 0.5385, "step": 12359 }, { "epoch": 1.79, "grad_norm": 5.921170711517334, "learning_rate": 7.482206577867451e-07, "loss": 0.5027, "step": 12360 }, { "epoch": 1.79, "grad_norm": 7.144556999206543, "learning_rate": 7.480674774869941e-07, "loss": 0.4607, "step": 12361 }, { "epoch": 1.79, "grad_norm": 6.320587635040283, "learning_rate": 7.47914303499003e-07, "loss": 0.5044, "step": 12362 }, { "epoch": 1.79, "grad_norm": 6.2926177978515625, "learning_rate": 7.477611358266096e-07, "loss": 0.6006, "step": 12363 }, { "epoch": 1.79, "grad_norm": 6.082335472106934, "learning_rate": 7.476079744736505e-07, "loss": 0.4642, "step": 12364 }, { "epoch": 1.79, "grad_norm": 7.4351983070373535, "learning_rate": 7.474548194439641e-07, "loss": 0.5798, "step": 12365 }, { "epoch": 1.79, "grad_norm": 6.759552001953125, "learning_rate": 7.473016707413864e-07, "loss": 0.5525, "step": 12366 }, { "epoch": 1.79, "grad_norm": 6.658748149871826, "learning_rate": 7.471485283697552e-07, "loss": 0.4722, "step": 12367 }, { "epoch": 1.79, "grad_norm": 6.934394359588623, "learning_rate": 7.469953923329065e-07, "loss": 0.5343, "step": 12368 }, { "epoch": 1.79, "grad_norm": 7.0731658935546875, "learning_rate": 7.468422626346772e-07, "loss": 0.5094, "step": 12369 }, { "epoch": 1.79, "grad_norm": 7.107043743133545, "learning_rate": 7.466891392789037e-07, "loss": 0.5473, "step": 12370 }, { "epoch": 1.79, "grad_norm": 6.138635635375977, "learning_rate": 7.465360222694223e-07, "loss": 0.4783, "step": 12371 }, { "epoch": 1.8, "grad_norm": 6.492652893066406, "learning_rate": 7.463829116100688e-07, "loss": 0.5371, "step": 12372 }, { "epoch": 1.8, "grad_norm": 6.590836048126221, "learning_rate": 7.462298073046792e-07, "loss": 0.5783, "step": 12373 }, { "epoch": 1.8, "grad_norm": 7.181041717529297, "learning_rate": 7.460767093570899e-07, "loss": 0.5826, "step": 12374 }, { "epoch": 1.8, "grad_norm": 6.5184807777404785, "learning_rate": 7.459236177711359e-07, "loss": 0.5264, "step": 12375 }, { "epoch": 1.8, "grad_norm": 6.536161422729492, "learning_rate": 7.457705325506531e-07, "loss": 0.5818, "step": 12376 }, { "epoch": 1.8, "grad_norm": 7.538476943969727, "learning_rate": 7.456174536994759e-07, "loss": 0.5352, "step": 12377 }, { "epoch": 1.8, "grad_norm": 6.824579238891602, "learning_rate": 7.454643812214407e-07, "loss": 0.5273, "step": 12378 }, { "epoch": 1.8, "grad_norm": 6.754522800445557, "learning_rate": 7.453113151203813e-07, "loss": 0.469, "step": 12379 }, { "epoch": 1.8, "grad_norm": 6.178822040557861, "learning_rate": 7.451582554001337e-07, "loss": 0.5289, "step": 12380 }, { "epoch": 1.8, "grad_norm": 6.1714253425598145, "learning_rate": 7.450052020645314e-07, "loss": 0.5545, "step": 12381 }, { "epoch": 1.8, "grad_norm": 6.283390045166016, "learning_rate": 7.448521551174096e-07, "loss": 0.5265, "step": 12382 }, { "epoch": 1.8, "grad_norm": 6.029516696929932, "learning_rate": 7.446991145626026e-07, "loss": 0.4973, "step": 12383 }, { "epoch": 1.8, "grad_norm": 6.645253658294678, "learning_rate": 7.445460804039445e-07, "loss": 0.5322, "step": 12384 }, { "epoch": 1.8, "grad_norm": 6.098289489746094, "learning_rate": 7.44393052645269e-07, "loss": 0.5163, "step": 12385 }, { "epoch": 1.8, "grad_norm": 7.478405952453613, "learning_rate": 7.442400312904106e-07, "loss": 0.5996, "step": 12386 }, { "epoch": 1.8, "grad_norm": 6.646485328674316, "learning_rate": 7.440870163432023e-07, "loss": 0.5535, "step": 12387 }, { "epoch": 1.8, "grad_norm": 5.670719623565674, "learning_rate": 7.439340078074783e-07, "loss": 0.4985, "step": 12388 }, { "epoch": 1.8, "grad_norm": 6.881594657897949, "learning_rate": 7.437810056870715e-07, "loss": 0.4988, "step": 12389 }, { "epoch": 1.8, "grad_norm": 7.154091835021973, "learning_rate": 7.436280099858154e-07, "loss": 0.4897, "step": 12390 }, { "epoch": 1.8, "grad_norm": 5.978325366973877, "learning_rate": 7.434750207075427e-07, "loss": 0.4565, "step": 12391 }, { "epoch": 1.8, "grad_norm": 7.116941928863525, "learning_rate": 7.433220378560869e-07, "loss": 0.5705, "step": 12392 }, { "epoch": 1.8, "grad_norm": 6.373172760009766, "learning_rate": 7.431690614352799e-07, "loss": 0.4782, "step": 12393 }, { "epoch": 1.8, "grad_norm": 7.239816188812256, "learning_rate": 7.430160914489554e-07, "loss": 0.5758, "step": 12394 }, { "epoch": 1.8, "grad_norm": 7.583924293518066, "learning_rate": 7.428631279009446e-07, "loss": 0.5783, "step": 12395 }, { "epoch": 1.8, "grad_norm": 6.321884632110596, "learning_rate": 7.427101707950807e-07, "loss": 0.5479, "step": 12396 }, { "epoch": 1.8, "grad_norm": 6.490196704864502, "learning_rate": 7.425572201351952e-07, "loss": 0.5785, "step": 12397 }, { "epoch": 1.8, "grad_norm": 6.522538185119629, "learning_rate": 7.424042759251201e-07, "loss": 0.4913, "step": 12398 }, { "epoch": 1.8, "grad_norm": 6.6630167961120605, "learning_rate": 7.422513381686877e-07, "loss": 0.5187, "step": 12399 }, { "epoch": 1.8, "grad_norm": 6.662533283233643, "learning_rate": 7.420984068697289e-07, "loss": 0.5656, "step": 12400 }, { "epoch": 1.8, "grad_norm": 6.542872428894043, "learning_rate": 7.419454820320758e-07, "loss": 0.4942, "step": 12401 }, { "epoch": 1.8, "grad_norm": 6.8216328620910645, "learning_rate": 7.417925636595592e-07, "loss": 0.5868, "step": 12402 }, { "epoch": 1.8, "grad_norm": 6.138669490814209, "learning_rate": 7.416396517560105e-07, "loss": 0.5456, "step": 12403 }, { "epoch": 1.8, "grad_norm": 5.696855068206787, "learning_rate": 7.414867463252601e-07, "loss": 0.498, "step": 12404 }, { "epoch": 1.8, "grad_norm": 6.825714111328125, "learning_rate": 7.413338473711398e-07, "loss": 0.5986, "step": 12405 }, { "epoch": 1.8, "grad_norm": 6.156954288482666, "learning_rate": 7.411809548974791e-07, "loss": 0.5804, "step": 12406 }, { "epoch": 1.8, "grad_norm": 6.8683342933654785, "learning_rate": 7.410280689081096e-07, "loss": 0.5336, "step": 12407 }, { "epoch": 1.8, "grad_norm": 6.510320663452148, "learning_rate": 7.408751894068606e-07, "loss": 0.504, "step": 12408 }, { "epoch": 1.8, "grad_norm": 7.254638671875, "learning_rate": 7.407223163975629e-07, "loss": 0.579, "step": 12409 }, { "epoch": 1.8, "grad_norm": 6.264765739440918, "learning_rate": 7.405694498840463e-07, "loss": 0.5525, "step": 12410 }, { "epoch": 1.8, "grad_norm": 6.338350296020508, "learning_rate": 7.404165898701408e-07, "loss": 0.5003, "step": 12411 }, { "epoch": 1.8, "grad_norm": 6.368931293487549, "learning_rate": 7.402637363596753e-07, "loss": 0.5144, "step": 12412 }, { "epoch": 1.8, "grad_norm": 8.149391174316406, "learning_rate": 7.401108893564805e-07, "loss": 0.5126, "step": 12413 }, { "epoch": 1.8, "grad_norm": 7.460134506225586, "learning_rate": 7.399580488643844e-07, "loss": 0.5137, "step": 12414 }, { "epoch": 1.8, "grad_norm": 6.6537370681762695, "learning_rate": 7.398052148872175e-07, "loss": 0.6076, "step": 12415 }, { "epoch": 1.8, "grad_norm": 6.731461048126221, "learning_rate": 7.396523874288076e-07, "loss": 0.5104, "step": 12416 }, { "epoch": 1.8, "grad_norm": 6.964995384216309, "learning_rate": 7.394995664929846e-07, "loss": 0.5501, "step": 12417 }, { "epoch": 1.8, "grad_norm": 5.875654220581055, "learning_rate": 7.393467520835766e-07, "loss": 0.442, "step": 12418 }, { "epoch": 1.8, "grad_norm": 6.294137954711914, "learning_rate": 7.391939442044121e-07, "loss": 0.4971, "step": 12419 }, { "epoch": 1.8, "grad_norm": 7.589890480041504, "learning_rate": 7.390411428593195e-07, "loss": 0.5554, "step": 12420 }, { "epoch": 1.8, "grad_norm": 7.646334648132324, "learning_rate": 7.388883480521273e-07, "loss": 0.6505, "step": 12421 }, { "epoch": 1.8, "grad_norm": 6.356682777404785, "learning_rate": 7.387355597866629e-07, "loss": 0.4844, "step": 12422 }, { "epoch": 1.8, "grad_norm": 6.451960563659668, "learning_rate": 7.385827780667549e-07, "loss": 0.5454, "step": 12423 }, { "epoch": 1.8, "grad_norm": 6.799034595489502, "learning_rate": 7.384300028962305e-07, "loss": 0.5793, "step": 12424 }, { "epoch": 1.8, "grad_norm": 5.976377964019775, "learning_rate": 7.38277234278917e-07, "loss": 0.5105, "step": 12425 }, { "epoch": 1.8, "grad_norm": 6.881580352783203, "learning_rate": 7.381244722186428e-07, "loss": 0.5981, "step": 12426 }, { "epoch": 1.8, "grad_norm": 6.952371597290039, "learning_rate": 7.379717167192338e-07, "loss": 0.6681, "step": 12427 }, { "epoch": 1.8, "grad_norm": 7.473722457885742, "learning_rate": 7.378189677845184e-07, "loss": 0.5575, "step": 12428 }, { "epoch": 1.8, "grad_norm": 6.374607086181641, "learning_rate": 7.376662254183222e-07, "loss": 0.5335, "step": 12429 }, { "epoch": 1.8, "grad_norm": 5.813887119293213, "learning_rate": 7.375134896244728e-07, "loss": 0.5564, "step": 12430 }, { "epoch": 1.8, "grad_norm": 6.785673141479492, "learning_rate": 7.373607604067961e-07, "loss": 0.6113, "step": 12431 }, { "epoch": 1.8, "grad_norm": 6.230395317077637, "learning_rate": 7.372080377691192e-07, "loss": 0.4687, "step": 12432 }, { "epoch": 1.8, "grad_norm": 6.7849531173706055, "learning_rate": 7.370553217152675e-07, "loss": 0.5846, "step": 12433 }, { "epoch": 1.8, "grad_norm": 6.488998889923096, "learning_rate": 7.369026122490681e-07, "loss": 0.5252, "step": 12434 }, { "epoch": 1.8, "grad_norm": 6.235602855682373, "learning_rate": 7.367499093743454e-07, "loss": 0.4865, "step": 12435 }, { "epoch": 1.8, "grad_norm": 6.503982067108154, "learning_rate": 7.365972130949267e-07, "loss": 0.4827, "step": 12436 }, { "epoch": 1.8, "grad_norm": 7.041769981384277, "learning_rate": 7.364445234146366e-07, "loss": 0.5611, "step": 12437 }, { "epoch": 1.8, "grad_norm": 6.395988941192627, "learning_rate": 7.362918403373006e-07, "loss": 0.4943, "step": 12438 }, { "epoch": 1.8, "grad_norm": 6.3762593269348145, "learning_rate": 7.361391638667443e-07, "loss": 0.5035, "step": 12439 }, { "epoch": 1.8, "grad_norm": 6.292174816131592, "learning_rate": 7.359864940067925e-07, "loss": 0.5124, "step": 12440 }, { "epoch": 1.81, "grad_norm": 6.692317962646484, "learning_rate": 7.358338307612697e-07, "loss": 0.52, "step": 12441 }, { "epoch": 1.81, "grad_norm": 6.144531726837158, "learning_rate": 7.356811741340016e-07, "loss": 0.5529, "step": 12442 }, { "epoch": 1.81, "grad_norm": 6.994798183441162, "learning_rate": 7.355285241288117e-07, "loss": 0.5495, "step": 12443 }, { "epoch": 1.81, "grad_norm": 6.600606441497803, "learning_rate": 7.353758807495254e-07, "loss": 0.5643, "step": 12444 }, { "epoch": 1.81, "grad_norm": 6.028946876525879, "learning_rate": 7.352232439999661e-07, "loss": 0.4897, "step": 12445 }, { "epoch": 1.81, "grad_norm": 6.967463493347168, "learning_rate": 7.350706138839585e-07, "loss": 0.5322, "step": 12446 }, { "epoch": 1.81, "grad_norm": 6.342487335205078, "learning_rate": 7.349179904053258e-07, "loss": 0.5417, "step": 12447 }, { "epoch": 1.81, "grad_norm": 6.911485195159912, "learning_rate": 7.347653735678926e-07, "loss": 0.5394, "step": 12448 }, { "epoch": 1.81, "grad_norm": 6.91694974899292, "learning_rate": 7.346127633754815e-07, "loss": 0.588, "step": 12449 }, { "epoch": 1.81, "grad_norm": 6.149013042449951, "learning_rate": 7.344601598319167e-07, "loss": 0.4993, "step": 12450 }, { "epoch": 1.81, "grad_norm": 6.6629133224487305, "learning_rate": 7.343075629410209e-07, "loss": 0.5299, "step": 12451 }, { "epoch": 1.81, "grad_norm": 6.90740966796875, "learning_rate": 7.341549727066177e-07, "loss": 0.5311, "step": 12452 }, { "epoch": 1.81, "grad_norm": 6.472512245178223, "learning_rate": 7.340023891325298e-07, "loss": 0.4735, "step": 12453 }, { "epoch": 1.81, "grad_norm": 6.939558982849121, "learning_rate": 7.338498122225793e-07, "loss": 0.5577, "step": 12454 }, { "epoch": 1.81, "grad_norm": 6.454801082611084, "learning_rate": 7.3369724198059e-07, "loss": 0.5676, "step": 12455 }, { "epoch": 1.81, "grad_norm": 6.247126579284668, "learning_rate": 7.335446784103829e-07, "loss": 0.4759, "step": 12456 }, { "epoch": 1.81, "grad_norm": 7.131679534912109, "learning_rate": 7.333921215157816e-07, "loss": 0.5288, "step": 12457 }, { "epoch": 1.81, "grad_norm": 7.600639343261719, "learning_rate": 7.332395713006072e-07, "loss": 0.5786, "step": 12458 }, { "epoch": 1.81, "grad_norm": 5.682115077972412, "learning_rate": 7.33087027768682e-07, "loss": 0.4815, "step": 12459 }, { "epoch": 1.81, "grad_norm": 6.726275444030762, "learning_rate": 7.329344909238274e-07, "loss": 0.5273, "step": 12460 }, { "epoch": 1.81, "grad_norm": 7.330649375915527, "learning_rate": 7.327819607698656e-07, "loss": 0.5838, "step": 12461 }, { "epoch": 1.81, "grad_norm": 6.501834392547607, "learning_rate": 7.32629437310617e-07, "loss": 0.6147, "step": 12462 }, { "epoch": 1.81, "grad_norm": 6.328659534454346, "learning_rate": 7.32476920549904e-07, "loss": 0.4978, "step": 12463 }, { "epoch": 1.81, "grad_norm": 6.319029331207275, "learning_rate": 7.323244104915464e-07, "loss": 0.6202, "step": 12464 }, { "epoch": 1.81, "grad_norm": 7.379667282104492, "learning_rate": 7.321719071393664e-07, "loss": 0.58, "step": 12465 }, { "epoch": 1.81, "grad_norm": 5.79461145401001, "learning_rate": 7.320194104971839e-07, "loss": 0.4638, "step": 12466 }, { "epoch": 1.81, "grad_norm": 7.546436309814453, "learning_rate": 7.318669205688197e-07, "loss": 0.5329, "step": 12467 }, { "epoch": 1.81, "grad_norm": 5.9750285148620605, "learning_rate": 7.317144373580939e-07, "loss": 0.4813, "step": 12468 }, { "epoch": 1.81, "grad_norm": 6.902876853942871, "learning_rate": 7.315619608688273e-07, "loss": 0.6117, "step": 12469 }, { "epoch": 1.81, "grad_norm": 6.489882469177246, "learning_rate": 7.314094911048392e-07, "loss": 0.5728, "step": 12470 }, { "epoch": 1.81, "grad_norm": 6.669498920440674, "learning_rate": 7.312570280699503e-07, "loss": 0.5533, "step": 12471 }, { "epoch": 1.81, "grad_norm": 6.948774337768555, "learning_rate": 7.311045717679797e-07, "loss": 0.5718, "step": 12472 }, { "epoch": 1.81, "grad_norm": 7.345036506652832, "learning_rate": 7.30952122202747e-07, "loss": 0.5014, "step": 12473 }, { "epoch": 1.81, "grad_norm": 5.842824459075928, "learning_rate": 7.307996793780719e-07, "loss": 0.5273, "step": 12474 }, { "epoch": 1.81, "grad_norm": 6.580438613891602, "learning_rate": 7.306472432977736e-07, "loss": 0.5403, "step": 12475 }, { "epoch": 1.81, "grad_norm": 5.752638339996338, "learning_rate": 7.304948139656706e-07, "loss": 0.4977, "step": 12476 }, { "epoch": 1.81, "grad_norm": 6.520016193389893, "learning_rate": 7.30342391385582e-07, "loss": 0.5766, "step": 12477 }, { "epoch": 1.81, "grad_norm": 6.10981559753418, "learning_rate": 7.301899755613271e-07, "loss": 0.4904, "step": 12478 }, { "epoch": 1.81, "grad_norm": 5.754114627838135, "learning_rate": 7.300375664967237e-07, "loss": 0.5377, "step": 12479 }, { "epoch": 1.81, "grad_norm": 6.249324798583984, "learning_rate": 7.298851641955908e-07, "loss": 0.4691, "step": 12480 }, { "epoch": 1.81, "grad_norm": 6.74753475189209, "learning_rate": 7.297327686617458e-07, "loss": 0.4622, "step": 12481 }, { "epoch": 1.81, "grad_norm": 6.149725437164307, "learning_rate": 7.295803798990075e-07, "loss": 0.5236, "step": 12482 }, { "epoch": 1.81, "grad_norm": 6.226953983306885, "learning_rate": 7.294279979111928e-07, "loss": 0.494, "step": 12483 }, { "epoch": 1.81, "grad_norm": 6.328827857971191, "learning_rate": 7.292756227021206e-07, "loss": 0.5116, "step": 12484 }, { "epoch": 1.81, "grad_norm": 7.063549518585205, "learning_rate": 7.291232542756076e-07, "loss": 0.5289, "step": 12485 }, { "epoch": 1.81, "grad_norm": 6.542887210845947, "learning_rate": 7.289708926354712e-07, "loss": 0.5134, "step": 12486 }, { "epoch": 1.81, "grad_norm": 7.2186479568481445, "learning_rate": 7.288185377855289e-07, "loss": 0.59, "step": 12487 }, { "epoch": 1.81, "grad_norm": 7.276336669921875, "learning_rate": 7.286661897295976e-07, "loss": 0.5752, "step": 12488 }, { "epoch": 1.81, "grad_norm": 6.905727863311768, "learning_rate": 7.285138484714935e-07, "loss": 0.5669, "step": 12489 }, { "epoch": 1.81, "grad_norm": 5.766725063323975, "learning_rate": 7.283615140150344e-07, "loss": 0.461, "step": 12490 }, { "epoch": 1.81, "grad_norm": 7.3058552742004395, "learning_rate": 7.282091863640359e-07, "loss": 0.5779, "step": 12491 }, { "epoch": 1.81, "grad_norm": 7.293422698974609, "learning_rate": 7.280568655223147e-07, "loss": 0.523, "step": 12492 }, { "epoch": 1.81, "grad_norm": 7.419482231140137, "learning_rate": 7.27904551493687e-07, "loss": 0.6123, "step": 12493 }, { "epoch": 1.81, "grad_norm": 7.362110614776611, "learning_rate": 7.277522442819686e-07, "loss": 0.5779, "step": 12494 }, { "epoch": 1.81, "grad_norm": 6.453719615936279, "learning_rate": 7.27599943890975e-07, "loss": 0.5113, "step": 12495 }, { "epoch": 1.81, "grad_norm": 6.468165397644043, "learning_rate": 7.274476503245229e-07, "loss": 0.5367, "step": 12496 }, { "epoch": 1.81, "grad_norm": 7.150845527648926, "learning_rate": 7.272953635864263e-07, "loss": 0.4981, "step": 12497 }, { "epoch": 1.81, "grad_norm": 6.1222825050354, "learning_rate": 7.271430836805017e-07, "loss": 0.4613, "step": 12498 }, { "epoch": 1.81, "grad_norm": 6.486866474151611, "learning_rate": 7.269908106105636e-07, "loss": 0.5971, "step": 12499 }, { "epoch": 1.81, "grad_norm": 6.593808650970459, "learning_rate": 7.268385443804275e-07, "loss": 0.5437, "step": 12500 }, { "epoch": 1.81, "grad_norm": 7.235062122344971, "learning_rate": 7.266862849939075e-07, "loss": 0.5781, "step": 12501 }, { "epoch": 1.81, "grad_norm": 6.203893661499023, "learning_rate": 7.265340324548184e-07, "loss": 0.5057, "step": 12502 }, { "epoch": 1.81, "grad_norm": 6.669858455657959, "learning_rate": 7.263817867669752e-07, "loss": 0.562, "step": 12503 }, { "epoch": 1.81, "grad_norm": 6.2050933837890625, "learning_rate": 7.262295479341913e-07, "loss": 0.5181, "step": 12504 }, { "epoch": 1.81, "grad_norm": 6.889961242675781, "learning_rate": 7.260773159602818e-07, "loss": 0.4881, "step": 12505 }, { "epoch": 1.81, "grad_norm": 5.85438346862793, "learning_rate": 7.259250908490598e-07, "loss": 0.5426, "step": 12506 }, { "epoch": 1.81, "grad_norm": 7.374849796295166, "learning_rate": 7.257728726043392e-07, "loss": 0.5589, "step": 12507 }, { "epoch": 1.81, "grad_norm": 6.677483081817627, "learning_rate": 7.256206612299338e-07, "loss": 0.5517, "step": 12508 }, { "epoch": 1.82, "grad_norm": 6.350024223327637, "learning_rate": 7.254684567296571e-07, "loss": 0.5305, "step": 12509 }, { "epoch": 1.82, "grad_norm": 7.206902027130127, "learning_rate": 7.253162591073217e-07, "loss": 0.4956, "step": 12510 }, { "epoch": 1.82, "grad_norm": 5.989011287689209, "learning_rate": 7.251640683667416e-07, "loss": 0.4989, "step": 12511 }, { "epoch": 1.82, "grad_norm": 6.352183818817139, "learning_rate": 7.250118845117288e-07, "loss": 0.5739, "step": 12512 }, { "epoch": 1.82, "grad_norm": 7.041607856750488, "learning_rate": 7.248597075460967e-07, "loss": 0.4987, "step": 12513 }, { "epoch": 1.82, "grad_norm": 7.010243892669678, "learning_rate": 7.247075374736576e-07, "loss": 0.6054, "step": 12514 }, { "epoch": 1.82, "grad_norm": 6.671907424926758, "learning_rate": 7.245553742982238e-07, "loss": 0.536, "step": 12515 }, { "epoch": 1.82, "grad_norm": 6.593270301818848, "learning_rate": 7.244032180236073e-07, "loss": 0.4733, "step": 12516 }, { "epoch": 1.82, "grad_norm": 6.199219226837158, "learning_rate": 7.242510686536208e-07, "loss": 0.6097, "step": 12517 }, { "epoch": 1.82, "grad_norm": 6.1865410804748535, "learning_rate": 7.240989261920751e-07, "loss": 0.5197, "step": 12518 }, { "epoch": 1.82, "grad_norm": 6.175308704376221, "learning_rate": 7.239467906427832e-07, "loss": 0.5695, "step": 12519 }, { "epoch": 1.82, "grad_norm": 6.013772010803223, "learning_rate": 7.237946620095555e-07, "loss": 0.516, "step": 12520 }, { "epoch": 1.82, "grad_norm": 7.388974189758301, "learning_rate": 7.236425402962038e-07, "loss": 0.5707, "step": 12521 }, { "epoch": 1.82, "grad_norm": 6.811976909637451, "learning_rate": 7.234904255065395e-07, "loss": 0.5517, "step": 12522 }, { "epoch": 1.82, "grad_norm": 7.021703243255615, "learning_rate": 7.233383176443732e-07, "loss": 0.5956, "step": 12523 }, { "epoch": 1.82, "grad_norm": 7.346940040588379, "learning_rate": 7.231862167135155e-07, "loss": 0.5738, "step": 12524 }, { "epoch": 1.82, "grad_norm": 6.77782678604126, "learning_rate": 7.230341227177778e-07, "loss": 0.547, "step": 12525 }, { "epoch": 1.82, "grad_norm": 6.27913236618042, "learning_rate": 7.228820356609697e-07, "loss": 0.5233, "step": 12526 }, { "epoch": 1.82, "grad_norm": 5.873827934265137, "learning_rate": 7.227299555469023e-07, "loss": 0.5679, "step": 12527 }, { "epoch": 1.82, "grad_norm": 7.35415506362915, "learning_rate": 7.225778823793852e-07, "loss": 0.5451, "step": 12528 }, { "epoch": 1.82, "grad_norm": 6.314331531524658, "learning_rate": 7.224258161622283e-07, "loss": 0.5139, "step": 12529 }, { "epoch": 1.82, "grad_norm": 6.151082515716553, "learning_rate": 7.222737568992419e-07, "loss": 0.5341, "step": 12530 }, { "epoch": 1.82, "grad_norm": 6.1949462890625, "learning_rate": 7.221217045942349e-07, "loss": 0.5398, "step": 12531 }, { "epoch": 1.82, "grad_norm": 6.951354503631592, "learning_rate": 7.219696592510175e-07, "loss": 0.5615, "step": 12532 }, { "epoch": 1.82, "grad_norm": 6.940424919128418, "learning_rate": 7.218176208733981e-07, "loss": 0.6262, "step": 12533 }, { "epoch": 1.82, "grad_norm": 6.025196552276611, "learning_rate": 7.216655894651866e-07, "loss": 0.4509, "step": 12534 }, { "epoch": 1.82, "grad_norm": 6.474498748779297, "learning_rate": 7.215135650301913e-07, "loss": 0.5242, "step": 12535 }, { "epoch": 1.82, "grad_norm": 6.752216815948486, "learning_rate": 7.213615475722214e-07, "loss": 0.5474, "step": 12536 }, { "epoch": 1.82, "grad_norm": 7.129445552825928, "learning_rate": 7.212095370950848e-07, "loss": 0.5062, "step": 12537 }, { "epoch": 1.82, "grad_norm": 7.062613010406494, "learning_rate": 7.210575336025906e-07, "loss": 0.5232, "step": 12538 }, { "epoch": 1.82, "grad_norm": 7.355691909790039, "learning_rate": 7.209055370985464e-07, "loss": 0.6077, "step": 12539 }, { "epoch": 1.82, "grad_norm": 6.791723728179932, "learning_rate": 7.207535475867608e-07, "loss": 0.5625, "step": 12540 }, { "epoch": 1.82, "grad_norm": 6.804980754852295, "learning_rate": 7.206015650710411e-07, "loss": 0.5648, "step": 12541 }, { "epoch": 1.82, "grad_norm": 7.794633388519287, "learning_rate": 7.204495895551952e-07, "loss": 0.5164, "step": 12542 }, { "epoch": 1.82, "grad_norm": 6.9402995109558105, "learning_rate": 7.202976210430306e-07, "loss": 0.5371, "step": 12543 }, { "epoch": 1.82, "grad_norm": 5.915365219116211, "learning_rate": 7.201456595383548e-07, "loss": 0.5261, "step": 12544 }, { "epoch": 1.82, "grad_norm": 7.498355388641357, "learning_rate": 7.199937050449745e-07, "loss": 0.5747, "step": 12545 }, { "epoch": 1.82, "grad_norm": 6.131109237670898, "learning_rate": 7.198417575666973e-07, "loss": 0.5335, "step": 12546 }, { "epoch": 1.82, "grad_norm": 7.158319473266602, "learning_rate": 7.196898171073293e-07, "loss": 0.5046, "step": 12547 }, { "epoch": 1.82, "grad_norm": 6.669244766235352, "learning_rate": 7.195378836706778e-07, "loss": 0.5894, "step": 12548 }, { "epoch": 1.82, "grad_norm": 6.495095252990723, "learning_rate": 7.193859572605489e-07, "loss": 0.5238, "step": 12549 }, { "epoch": 1.82, "grad_norm": 6.814632415771484, "learning_rate": 7.192340378807489e-07, "loss": 0.4847, "step": 12550 }, { "epoch": 1.82, "grad_norm": 5.8393073081970215, "learning_rate": 7.190821255350835e-07, "loss": 0.4899, "step": 12551 }, { "epoch": 1.82, "grad_norm": 6.497196674346924, "learning_rate": 7.189302202273595e-07, "loss": 0.5214, "step": 12552 }, { "epoch": 1.82, "grad_norm": 6.023679256439209, "learning_rate": 7.187783219613817e-07, "loss": 0.5012, "step": 12553 }, { "epoch": 1.82, "grad_norm": 6.169140338897705, "learning_rate": 7.186264307409562e-07, "loss": 0.5165, "step": 12554 }, { "epoch": 1.82, "grad_norm": 7.302877426147461, "learning_rate": 7.184745465698885e-07, "loss": 0.6041, "step": 12555 }, { "epoch": 1.82, "grad_norm": 6.805825233459473, "learning_rate": 7.183226694519835e-07, "loss": 0.5156, "step": 12556 }, { "epoch": 1.82, "grad_norm": 6.396744728088379, "learning_rate": 7.181707993910464e-07, "loss": 0.5318, "step": 12557 }, { "epoch": 1.82, "grad_norm": 6.482295036315918, "learning_rate": 7.180189363908817e-07, "loss": 0.5042, "step": 12558 }, { "epoch": 1.82, "grad_norm": 7.506539821624756, "learning_rate": 7.178670804552949e-07, "loss": 0.6369, "step": 12559 }, { "epoch": 1.82, "grad_norm": 6.685135364532471, "learning_rate": 7.177152315880895e-07, "loss": 0.528, "step": 12560 }, { "epoch": 1.82, "grad_norm": 6.537661552429199, "learning_rate": 7.175633897930706e-07, "loss": 0.5397, "step": 12561 }, { "epoch": 1.82, "grad_norm": 6.4825358390808105, "learning_rate": 7.17411555074042e-07, "loss": 0.5335, "step": 12562 }, { "epoch": 1.82, "grad_norm": 6.579501152038574, "learning_rate": 7.17259727434808e-07, "loss": 0.5646, "step": 12563 }, { "epoch": 1.82, "grad_norm": 6.236721038818359, "learning_rate": 7.171079068791715e-07, "loss": 0.4907, "step": 12564 }, { "epoch": 1.82, "grad_norm": 6.698861598968506, "learning_rate": 7.169560934109373e-07, "loss": 0.5551, "step": 12565 }, { "epoch": 1.82, "grad_norm": 6.105212688446045, "learning_rate": 7.168042870339077e-07, "loss": 0.495, "step": 12566 }, { "epoch": 1.82, "grad_norm": 6.027642250061035, "learning_rate": 7.166524877518871e-07, "loss": 0.5362, "step": 12567 }, { "epoch": 1.82, "grad_norm": 6.533984661102295, "learning_rate": 7.165006955686776e-07, "loss": 0.5799, "step": 12568 }, { "epoch": 1.82, "grad_norm": 6.02028751373291, "learning_rate": 7.16348910488083e-07, "loss": 0.5393, "step": 12569 }, { "epoch": 1.82, "grad_norm": 6.091993808746338, "learning_rate": 7.161971325139054e-07, "loss": 0.5415, "step": 12570 }, { "epoch": 1.82, "grad_norm": 7.110718250274658, "learning_rate": 7.160453616499476e-07, "loss": 0.494, "step": 12571 }, { "epoch": 1.82, "grad_norm": 6.138510704040527, "learning_rate": 7.158935979000113e-07, "loss": 0.5266, "step": 12572 }, { "epoch": 1.82, "grad_norm": 6.950648784637451, "learning_rate": 7.157418412678999e-07, "loss": 0.5154, "step": 12573 }, { "epoch": 1.82, "grad_norm": 6.434041976928711, "learning_rate": 7.155900917574145e-07, "loss": 0.5571, "step": 12574 }, { "epoch": 1.82, "grad_norm": 7.096704006195068, "learning_rate": 7.154383493723574e-07, "loss": 0.5357, "step": 12575 }, { "epoch": 1.82, "grad_norm": 5.773373603820801, "learning_rate": 7.152866141165299e-07, "loss": 0.4994, "step": 12576 }, { "epoch": 1.82, "grad_norm": 6.743208885192871, "learning_rate": 7.151348859937339e-07, "loss": 0.5137, "step": 12577 }, { "epoch": 1.83, "grad_norm": 6.487629413604736, "learning_rate": 7.149831650077704e-07, "loss": 0.5036, "step": 12578 }, { "epoch": 1.83, "grad_norm": 6.416956424713135, "learning_rate": 7.148314511624407e-07, "loss": 0.5203, "step": 12579 }, { "epoch": 1.83, "grad_norm": 6.8425984382629395, "learning_rate": 7.146797444615453e-07, "loss": 0.6104, "step": 12580 }, { "epoch": 1.83, "grad_norm": 6.900712966918945, "learning_rate": 7.145280449088853e-07, "loss": 0.5329, "step": 12581 }, { "epoch": 1.83, "grad_norm": 6.385718822479248, "learning_rate": 7.143763525082618e-07, "loss": 0.4961, "step": 12582 }, { "epoch": 1.83, "grad_norm": 6.672927379608154, "learning_rate": 7.142246672634744e-07, "loss": 0.5736, "step": 12583 }, { "epoch": 1.83, "grad_norm": 7.446636199951172, "learning_rate": 7.140729891783237e-07, "loss": 0.6521, "step": 12584 }, { "epoch": 1.83, "grad_norm": 5.646495819091797, "learning_rate": 7.139213182566094e-07, "loss": 0.4939, "step": 12585 }, { "epoch": 1.83, "grad_norm": 6.390223979949951, "learning_rate": 7.13769654502132e-07, "loss": 0.5388, "step": 12586 }, { "epoch": 1.83, "grad_norm": 6.9643659591674805, "learning_rate": 7.136179979186904e-07, "loss": 0.5598, "step": 12587 }, { "epoch": 1.83, "grad_norm": 6.789828777313232, "learning_rate": 7.134663485100852e-07, "loss": 0.5549, "step": 12588 }, { "epoch": 1.83, "grad_norm": 7.073267936706543, "learning_rate": 7.133147062801143e-07, "loss": 0.5653, "step": 12589 }, { "epoch": 1.83, "grad_norm": 6.389991283416748, "learning_rate": 7.131630712325783e-07, "loss": 0.4633, "step": 12590 }, { "epoch": 1.83, "grad_norm": 5.829792499542236, "learning_rate": 7.130114433712751e-07, "loss": 0.4718, "step": 12591 }, { "epoch": 1.83, "grad_norm": 7.097365379333496, "learning_rate": 7.128598227000043e-07, "loss": 0.5134, "step": 12592 }, { "epoch": 1.83, "grad_norm": 6.3252997398376465, "learning_rate": 7.127082092225634e-07, "loss": 0.4654, "step": 12593 }, { "epoch": 1.83, "grad_norm": 6.485024929046631, "learning_rate": 7.125566029427521e-07, "loss": 0.5448, "step": 12594 }, { "epoch": 1.83, "grad_norm": 6.720816135406494, "learning_rate": 7.124050038643677e-07, "loss": 0.6144, "step": 12595 }, { "epoch": 1.83, "grad_norm": 7.0278778076171875, "learning_rate": 7.122534119912089e-07, "loss": 0.5316, "step": 12596 }, { "epoch": 1.83, "grad_norm": 6.264552593231201, "learning_rate": 7.121018273270732e-07, "loss": 0.5063, "step": 12597 }, { "epoch": 1.83, "grad_norm": 6.386725425720215, "learning_rate": 7.119502498757583e-07, "loss": 0.5369, "step": 12598 }, { "epoch": 1.83, "grad_norm": 6.774592876434326, "learning_rate": 7.11798679641062e-07, "loss": 0.5316, "step": 12599 }, { "epoch": 1.83, "grad_norm": 6.397944450378418, "learning_rate": 7.116471166267816e-07, "loss": 0.5842, "step": 12600 }, { "epoch": 1.83, "grad_norm": 7.294824123382568, "learning_rate": 7.114955608367139e-07, "loss": 0.5133, "step": 12601 }, { "epoch": 1.83, "grad_norm": 6.8400115966796875, "learning_rate": 7.113440122746565e-07, "loss": 0.5386, "step": 12602 }, { "epoch": 1.83, "grad_norm": 6.150945663452148, "learning_rate": 7.111924709444055e-07, "loss": 0.5045, "step": 12603 }, { "epoch": 1.83, "grad_norm": 7.030449390411377, "learning_rate": 7.110409368497583e-07, "loss": 0.6151, "step": 12604 }, { "epoch": 1.83, "grad_norm": 7.10664176940918, "learning_rate": 7.108894099945107e-07, "loss": 0.5302, "step": 12605 }, { "epoch": 1.83, "grad_norm": 7.2151265144348145, "learning_rate": 7.107378903824592e-07, "loss": 0.566, "step": 12606 }, { "epoch": 1.83, "grad_norm": 7.49774694442749, "learning_rate": 7.105863780173995e-07, "loss": 0.6196, "step": 12607 }, { "epoch": 1.83, "grad_norm": 6.310940265655518, "learning_rate": 7.10434872903128e-07, "loss": 0.522, "step": 12608 }, { "epoch": 1.83, "grad_norm": 6.822059631347656, "learning_rate": 7.102833750434406e-07, "loss": 0.4829, "step": 12609 }, { "epoch": 1.83, "grad_norm": 7.317372798919678, "learning_rate": 7.101318844421323e-07, "loss": 0.5431, "step": 12610 }, { "epoch": 1.83, "grad_norm": 8.314519882202148, "learning_rate": 7.099804011029986e-07, "loss": 0.5603, "step": 12611 }, { "epoch": 1.83, "grad_norm": 6.153129577636719, "learning_rate": 7.098289250298348e-07, "loss": 0.4956, "step": 12612 }, { "epoch": 1.83, "grad_norm": 5.839688777923584, "learning_rate": 7.096774562264361e-07, "loss": 0.5245, "step": 12613 }, { "epoch": 1.83, "grad_norm": 8.2291259765625, "learning_rate": 7.095259946965964e-07, "loss": 0.616, "step": 12614 }, { "epoch": 1.83, "grad_norm": 6.911228656768799, "learning_rate": 7.093745404441113e-07, "loss": 0.4853, "step": 12615 }, { "epoch": 1.83, "grad_norm": 7.413379192352295, "learning_rate": 7.092230934727747e-07, "loss": 0.5498, "step": 12616 }, { "epoch": 1.83, "grad_norm": 7.2752838134765625, "learning_rate": 7.090716537863813e-07, "loss": 0.5708, "step": 12617 }, { "epoch": 1.83, "grad_norm": 6.348428249359131, "learning_rate": 7.089202213887247e-07, "loss": 0.5036, "step": 12618 }, { "epoch": 1.83, "grad_norm": 6.407506465911865, "learning_rate": 7.087687962835993e-07, "loss": 0.5731, "step": 12619 }, { "epoch": 1.83, "grad_norm": 6.323663234710693, "learning_rate": 7.08617378474798e-07, "loss": 0.5346, "step": 12620 }, { "epoch": 1.83, "grad_norm": 6.578364372253418, "learning_rate": 7.084659679661153e-07, "loss": 0.5172, "step": 12621 }, { "epoch": 1.83, "grad_norm": 6.782736301422119, "learning_rate": 7.083145647613436e-07, "loss": 0.4985, "step": 12622 }, { "epoch": 1.83, "grad_norm": 6.54237699508667, "learning_rate": 7.081631688642769e-07, "loss": 0.4837, "step": 12623 }, { "epoch": 1.83, "grad_norm": 5.992173671722412, "learning_rate": 7.080117802787076e-07, "loss": 0.5535, "step": 12624 }, { "epoch": 1.83, "grad_norm": 5.877194881439209, "learning_rate": 7.07860399008429e-07, "loss": 0.5113, "step": 12625 }, { "epoch": 1.83, "grad_norm": 5.673493385314941, "learning_rate": 7.077090250572332e-07, "loss": 0.496, "step": 12626 }, { "epoch": 1.83, "grad_norm": 7.055243492126465, "learning_rate": 7.075576584289132e-07, "loss": 0.6171, "step": 12627 }, { "epoch": 1.83, "grad_norm": 6.263822078704834, "learning_rate": 7.074062991272602e-07, "loss": 0.5145, "step": 12628 }, { "epoch": 1.83, "grad_norm": 5.461036205291748, "learning_rate": 7.072549471560675e-07, "loss": 0.4743, "step": 12629 }, { "epoch": 1.83, "grad_norm": 6.272284030914307, "learning_rate": 7.071036025191262e-07, "loss": 0.5499, "step": 12630 }, { "epoch": 1.83, "grad_norm": 6.5708417892456055, "learning_rate": 7.069522652202286e-07, "loss": 0.5404, "step": 12631 }, { "epoch": 1.83, "grad_norm": 7.263172626495361, "learning_rate": 7.068009352631656e-07, "loss": 0.5999, "step": 12632 }, { "epoch": 1.83, "grad_norm": 6.65378999710083, "learning_rate": 7.066496126517286e-07, "loss": 0.5151, "step": 12633 }, { "epoch": 1.83, "grad_norm": 7.027369022369385, "learning_rate": 7.064982973897092e-07, "loss": 0.558, "step": 12634 }, { "epoch": 1.83, "grad_norm": 5.664677143096924, "learning_rate": 7.063469894808976e-07, "loss": 0.5303, "step": 12635 }, { "epoch": 1.83, "grad_norm": 5.916688442230225, "learning_rate": 7.061956889290857e-07, "loss": 0.5115, "step": 12636 }, { "epoch": 1.83, "grad_norm": 7.8132524490356445, "learning_rate": 7.060443957380627e-07, "loss": 0.5001, "step": 12637 }, { "epoch": 1.83, "grad_norm": 6.969934940338135, "learning_rate": 7.058931099116204e-07, "loss": 0.4877, "step": 12638 }, { "epoch": 1.83, "grad_norm": 6.189064025878906, "learning_rate": 7.057418314535481e-07, "loss": 0.5278, "step": 12639 }, { "epoch": 1.83, "grad_norm": 6.053227424621582, "learning_rate": 7.055905603676361e-07, "loss": 0.529, "step": 12640 }, { "epoch": 1.83, "grad_norm": 7.058919429779053, "learning_rate": 7.054392966576742e-07, "loss": 0.559, "step": 12641 }, { "epoch": 1.83, "grad_norm": 6.162011623382568, "learning_rate": 7.052880403274524e-07, "loss": 0.5348, "step": 12642 }, { "epoch": 1.83, "grad_norm": 6.743869304656982, "learning_rate": 7.051367913807593e-07, "loss": 0.527, "step": 12643 }, { "epoch": 1.83, "grad_norm": 6.515260219573975, "learning_rate": 7.049855498213853e-07, "loss": 0.4874, "step": 12644 }, { "epoch": 1.83, "grad_norm": 6.556314945220947, "learning_rate": 7.048343156531189e-07, "loss": 0.6043, "step": 12645 }, { "epoch": 1.83, "grad_norm": 6.605673789978027, "learning_rate": 7.04683088879749e-07, "loss": 0.5876, "step": 12646 }, { "epoch": 1.84, "grad_norm": 6.129958152770996, "learning_rate": 7.045318695050646e-07, "loss": 0.4832, "step": 12647 }, { "epoch": 1.84, "grad_norm": 6.8637566566467285, "learning_rate": 7.043806575328541e-07, "loss": 0.492, "step": 12648 }, { "epoch": 1.84, "grad_norm": 7.105936527252197, "learning_rate": 7.042294529669058e-07, "loss": 0.5719, "step": 12649 }, { "epoch": 1.84, "grad_norm": 6.341182708740234, "learning_rate": 7.040782558110081e-07, "loss": 0.5282, "step": 12650 }, { "epoch": 1.84, "grad_norm": 6.748471736907959, "learning_rate": 7.039270660689487e-07, "loss": 0.5268, "step": 12651 }, { "epoch": 1.84, "grad_norm": 6.492860317230225, "learning_rate": 7.037758837445158e-07, "loss": 0.523, "step": 12652 }, { "epoch": 1.84, "grad_norm": 7.499214172363281, "learning_rate": 7.036247088414967e-07, "loss": 0.6823, "step": 12653 }, { "epoch": 1.84, "grad_norm": 6.122351169586182, "learning_rate": 7.034735413636792e-07, "loss": 0.5035, "step": 12654 }, { "epoch": 1.84, "grad_norm": 6.266186237335205, "learning_rate": 7.033223813148499e-07, "loss": 0.498, "step": 12655 }, { "epoch": 1.84, "grad_norm": 6.678210735321045, "learning_rate": 7.031712286987966e-07, "loss": 0.607, "step": 12656 }, { "epoch": 1.84, "grad_norm": 6.161315441131592, "learning_rate": 7.030200835193056e-07, "loss": 0.5074, "step": 12657 }, { "epoch": 1.84, "grad_norm": 5.626999378204346, "learning_rate": 7.028689457801642e-07, "loss": 0.5079, "step": 12658 }, { "epoch": 1.84, "grad_norm": 6.587088584899902, "learning_rate": 7.02717815485158e-07, "loss": 0.5016, "step": 12659 }, { "epoch": 1.84, "grad_norm": 6.279638767242432, "learning_rate": 7.025666926380744e-07, "loss": 0.503, "step": 12660 }, { "epoch": 1.84, "grad_norm": 6.074860572814941, "learning_rate": 7.024155772426992e-07, "loss": 0.494, "step": 12661 }, { "epoch": 1.84, "grad_norm": 6.887739658355713, "learning_rate": 7.022644693028177e-07, "loss": 0.5671, "step": 12662 }, { "epoch": 1.84, "grad_norm": 6.522488117218018, "learning_rate": 7.021133688222165e-07, "loss": 0.5641, "step": 12663 }, { "epoch": 1.84, "grad_norm": 8.256787300109863, "learning_rate": 7.019622758046805e-07, "loss": 0.605, "step": 12664 }, { "epoch": 1.84, "grad_norm": 6.652737140655518, "learning_rate": 7.018111902539958e-07, "loss": 0.5776, "step": 12665 }, { "epoch": 1.84, "grad_norm": 6.326657772064209, "learning_rate": 7.01660112173947e-07, "loss": 0.4869, "step": 12666 }, { "epoch": 1.84, "grad_norm": 6.355654239654541, "learning_rate": 7.015090415683194e-07, "loss": 0.5029, "step": 12667 }, { "epoch": 1.84, "grad_norm": 6.59501838684082, "learning_rate": 7.013579784408977e-07, "loss": 0.6117, "step": 12668 }, { "epoch": 1.84, "grad_norm": 6.12533712387085, "learning_rate": 7.012069227954669e-07, "loss": 0.5496, "step": 12669 }, { "epoch": 1.84, "grad_norm": 6.5780792236328125, "learning_rate": 7.010558746358106e-07, "loss": 0.5019, "step": 12670 }, { "epoch": 1.84, "grad_norm": 6.729022979736328, "learning_rate": 7.009048339657142e-07, "loss": 0.5918, "step": 12671 }, { "epoch": 1.84, "grad_norm": 6.270429611206055, "learning_rate": 7.007538007889607e-07, "loss": 0.5665, "step": 12672 }, { "epoch": 1.84, "grad_norm": 6.835890769958496, "learning_rate": 7.006027751093349e-07, "loss": 0.5272, "step": 12673 }, { "epoch": 1.84, "grad_norm": 6.84719705581665, "learning_rate": 7.0045175693062e-07, "loss": 0.6305, "step": 12674 }, { "epoch": 1.84, "grad_norm": 7.480362892150879, "learning_rate": 7.003007462565998e-07, "loss": 0.6465, "step": 12675 }, { "epoch": 1.84, "grad_norm": 7.091612815856934, "learning_rate": 7.00149743091057e-07, "loss": 0.5329, "step": 12676 }, { "epoch": 1.84, "grad_norm": 6.530051231384277, "learning_rate": 6.999987474377755e-07, "loss": 0.5473, "step": 12677 }, { "epoch": 1.84, "grad_norm": 7.125072956085205, "learning_rate": 6.998477593005376e-07, "loss": 0.5908, "step": 12678 }, { "epoch": 1.84, "grad_norm": 6.569007873535156, "learning_rate": 6.996967786831268e-07, "loss": 0.5699, "step": 12679 }, { "epoch": 1.84, "grad_norm": 7.135061740875244, "learning_rate": 6.995458055893248e-07, "loss": 0.548, "step": 12680 }, { "epoch": 1.84, "grad_norm": 6.303440093994141, "learning_rate": 6.99394840022915e-07, "loss": 0.4563, "step": 12681 }, { "epoch": 1.84, "grad_norm": 6.439273357391357, "learning_rate": 6.992438819876787e-07, "loss": 0.5209, "step": 12682 }, { "epoch": 1.84, "grad_norm": 6.751865386962891, "learning_rate": 6.990929314873985e-07, "loss": 0.4899, "step": 12683 }, { "epoch": 1.84, "grad_norm": 6.830300331115723, "learning_rate": 6.989419885258557e-07, "loss": 0.4952, "step": 12684 }, { "epoch": 1.84, "grad_norm": 7.6047139167785645, "learning_rate": 6.987910531068322e-07, "loss": 0.5329, "step": 12685 }, { "epoch": 1.84, "grad_norm": 6.817808151245117, "learning_rate": 6.986401252341097e-07, "loss": 0.5298, "step": 12686 }, { "epoch": 1.84, "grad_norm": 6.690836429595947, "learning_rate": 6.98489204911469e-07, "loss": 0.5416, "step": 12687 }, { "epoch": 1.84, "grad_norm": 6.866029739379883, "learning_rate": 6.983382921426916e-07, "loss": 0.5715, "step": 12688 }, { "epoch": 1.84, "grad_norm": 6.917782306671143, "learning_rate": 6.98187386931558e-07, "loss": 0.5415, "step": 12689 }, { "epoch": 1.84, "grad_norm": 6.479127407073975, "learning_rate": 6.980364892818491e-07, "loss": 0.5476, "step": 12690 }, { "epoch": 1.84, "grad_norm": 6.395106315612793, "learning_rate": 6.978855991973451e-07, "loss": 0.5011, "step": 12691 }, { "epoch": 1.84, "grad_norm": 6.403256893157959, "learning_rate": 6.977347166818268e-07, "loss": 0.5373, "step": 12692 }, { "epoch": 1.84, "grad_norm": 6.1436262130737305, "learning_rate": 6.975838417390737e-07, "loss": 0.5189, "step": 12693 }, { "epoch": 1.84, "grad_norm": 5.906546592712402, "learning_rate": 6.974329743728665e-07, "loss": 0.5356, "step": 12694 }, { "epoch": 1.84, "grad_norm": 6.807794094085693, "learning_rate": 6.972821145869843e-07, "loss": 0.5289, "step": 12695 }, { "epoch": 1.84, "grad_norm": 7.406466960906982, "learning_rate": 6.97131262385207e-07, "loss": 0.473, "step": 12696 }, { "epoch": 1.84, "grad_norm": 6.4319748878479, "learning_rate": 6.969804177713135e-07, "loss": 0.5179, "step": 12697 }, { "epoch": 1.84, "grad_norm": 6.359991073608398, "learning_rate": 6.968295807490836e-07, "loss": 0.4671, "step": 12698 }, { "epoch": 1.84, "grad_norm": 6.686303615570068, "learning_rate": 6.966787513222955e-07, "loss": 0.5963, "step": 12699 }, { "epoch": 1.84, "grad_norm": 6.55600643157959, "learning_rate": 6.965279294947291e-07, "loss": 0.5462, "step": 12700 }, { "epoch": 1.84, "grad_norm": 6.5298566818237305, "learning_rate": 6.963771152701618e-07, "loss": 0.5326, "step": 12701 }, { "epoch": 1.84, "grad_norm": 6.430877208709717, "learning_rate": 6.962263086523727e-07, "loss": 0.5924, "step": 12702 }, { "epoch": 1.84, "grad_norm": 6.2766642570495605, "learning_rate": 6.9607550964514e-07, "loss": 0.5185, "step": 12703 }, { "epoch": 1.84, "grad_norm": 6.573617935180664, "learning_rate": 6.959247182522416e-07, "loss": 0.5867, "step": 12704 }, { "epoch": 1.84, "grad_norm": 6.995018482208252, "learning_rate": 6.95773934477455e-07, "loss": 0.6295, "step": 12705 }, { "epoch": 1.84, "grad_norm": 6.772429943084717, "learning_rate": 6.956231583245585e-07, "loss": 0.5373, "step": 12706 }, { "epoch": 1.84, "grad_norm": 6.2097907066345215, "learning_rate": 6.954723897973287e-07, "loss": 0.5837, "step": 12707 }, { "epoch": 1.84, "grad_norm": 7.578275203704834, "learning_rate": 6.953216288995439e-07, "loss": 0.6113, "step": 12708 }, { "epoch": 1.84, "grad_norm": 7.203464031219482, "learning_rate": 6.951708756349804e-07, "loss": 0.6712, "step": 12709 }, { "epoch": 1.84, "grad_norm": 6.022926330566406, "learning_rate": 6.950201300074156e-07, "loss": 0.517, "step": 12710 }, { "epoch": 1.84, "grad_norm": 6.710977077484131, "learning_rate": 6.948693920206254e-07, "loss": 0.6613, "step": 12711 }, { "epoch": 1.84, "grad_norm": 6.03170919418335, "learning_rate": 6.947186616783869e-07, "loss": 0.5103, "step": 12712 }, { "epoch": 1.84, "grad_norm": 7.654092311859131, "learning_rate": 6.945679389844765e-07, "loss": 0.5452, "step": 12713 }, { "epoch": 1.84, "grad_norm": 6.997279167175293, "learning_rate": 6.944172239426701e-07, "loss": 0.4999, "step": 12714 }, { "epoch": 1.84, "grad_norm": 7.1030592918396, "learning_rate": 6.942665165567436e-07, "loss": 0.5465, "step": 12715 }, { "epoch": 1.85, "grad_norm": 6.9309916496276855, "learning_rate": 6.941158168304729e-07, "loss": 0.586, "step": 12716 }, { "epoch": 1.85, "grad_norm": 6.52720832824707, "learning_rate": 6.939651247676332e-07, "loss": 0.5289, "step": 12717 }, { "epoch": 1.85, "grad_norm": 6.9598002433776855, "learning_rate": 6.93814440372e-07, "loss": 0.5404, "step": 12718 }, { "epoch": 1.85, "grad_norm": 7.015082836151123, "learning_rate": 6.936637636473488e-07, "loss": 0.5725, "step": 12719 }, { "epoch": 1.85, "grad_norm": 6.711231708526611, "learning_rate": 6.935130945974539e-07, "loss": 0.5219, "step": 12720 }, { "epoch": 1.85, "grad_norm": 7.271054744720459, "learning_rate": 6.933624332260908e-07, "loss": 0.5187, "step": 12721 }, { "epoch": 1.85, "grad_norm": 6.492790222167969, "learning_rate": 6.932117795370333e-07, "loss": 0.5183, "step": 12722 }, { "epoch": 1.85, "grad_norm": 5.792990207672119, "learning_rate": 6.930611335340566e-07, "loss": 0.5054, "step": 12723 }, { "epoch": 1.85, "grad_norm": 6.761744022369385, "learning_rate": 6.929104952209342e-07, "loss": 0.5015, "step": 12724 }, { "epoch": 1.85, "grad_norm": 6.539551258087158, "learning_rate": 6.927598646014406e-07, "loss": 0.5058, "step": 12725 }, { "epoch": 1.85, "grad_norm": 6.9417924880981445, "learning_rate": 6.92609241679349e-07, "loss": 0.5845, "step": 12726 }, { "epoch": 1.85, "grad_norm": 7.75313138961792, "learning_rate": 6.924586264584337e-07, "loss": 0.5899, "step": 12727 }, { "epoch": 1.85, "grad_norm": 6.0536208152771, "learning_rate": 6.923080189424675e-07, "loss": 0.4922, "step": 12728 }, { "epoch": 1.85, "grad_norm": 6.944636821746826, "learning_rate": 6.921574191352243e-07, "loss": 0.5184, "step": 12729 }, { "epoch": 1.85, "grad_norm": 6.775444507598877, "learning_rate": 6.920068270404766e-07, "loss": 0.5497, "step": 12730 }, { "epoch": 1.85, "grad_norm": 6.605119705200195, "learning_rate": 6.918562426619976e-07, "loss": 0.511, "step": 12731 }, { "epoch": 1.85, "grad_norm": 7.756500720977783, "learning_rate": 6.917056660035594e-07, "loss": 0.5581, "step": 12732 }, { "epoch": 1.85, "grad_norm": 7.641776084899902, "learning_rate": 6.915550970689352e-07, "loss": 0.6087, "step": 12733 }, { "epoch": 1.85, "grad_norm": 6.79690408706665, "learning_rate": 6.914045358618965e-07, "loss": 0.5146, "step": 12734 }, { "epoch": 1.85, "grad_norm": 6.858240604400635, "learning_rate": 6.91253982386216e-07, "loss": 0.6101, "step": 12735 }, { "epoch": 1.85, "grad_norm": 6.4797492027282715, "learning_rate": 6.911034366456651e-07, "loss": 0.5136, "step": 12736 }, { "epoch": 1.85, "grad_norm": 6.8988542556762695, "learning_rate": 6.909528986440158e-07, "loss": 0.5593, "step": 12737 }, { "epoch": 1.85, "grad_norm": 5.788909435272217, "learning_rate": 6.908023683850397e-07, "loss": 0.517, "step": 12738 }, { "epoch": 1.85, "grad_norm": 7.03017520904541, "learning_rate": 6.906518458725075e-07, "loss": 0.527, "step": 12739 }, { "epoch": 1.85, "grad_norm": 6.288749694824219, "learning_rate": 6.905013311101909e-07, "loss": 0.5122, "step": 12740 }, { "epoch": 1.85, "grad_norm": 6.598045349121094, "learning_rate": 6.903508241018603e-07, "loss": 0.5331, "step": 12741 }, { "epoch": 1.85, "grad_norm": 6.968730449676514, "learning_rate": 6.90200324851287e-07, "loss": 0.5011, "step": 12742 }, { "epoch": 1.85, "grad_norm": 6.996326446533203, "learning_rate": 6.900498333622411e-07, "loss": 0.6021, "step": 12743 }, { "epoch": 1.85, "grad_norm": 7.321055889129639, "learning_rate": 6.898993496384931e-07, "loss": 0.5511, "step": 12744 }, { "epoch": 1.85, "grad_norm": 6.1841888427734375, "learning_rate": 6.897488736838127e-07, "loss": 0.5209, "step": 12745 }, { "epoch": 1.85, "grad_norm": 7.108781814575195, "learning_rate": 6.895984055019705e-07, "loss": 0.6014, "step": 12746 }, { "epoch": 1.85, "grad_norm": 6.845930099487305, "learning_rate": 6.894479450967357e-07, "loss": 0.5164, "step": 12747 }, { "epoch": 1.85, "grad_norm": 6.16326904296875, "learning_rate": 6.892974924718782e-07, "loss": 0.5181, "step": 12748 }, { "epoch": 1.85, "grad_norm": 6.639042854309082, "learning_rate": 6.891470476311669e-07, "loss": 0.5213, "step": 12749 }, { "epoch": 1.85, "grad_norm": 6.078150749206543, "learning_rate": 6.889966105783715e-07, "loss": 0.5653, "step": 12750 }, { "epoch": 1.85, "grad_norm": 5.855276584625244, "learning_rate": 6.888461813172608e-07, "loss": 0.4999, "step": 12751 }, { "epoch": 1.85, "grad_norm": 7.200145244598389, "learning_rate": 6.886957598516035e-07, "loss": 0.5509, "step": 12752 }, { "epoch": 1.85, "grad_norm": 6.7346978187561035, "learning_rate": 6.885453461851676e-07, "loss": 0.5451, "step": 12753 }, { "epoch": 1.85, "grad_norm": 6.512875556945801, "learning_rate": 6.883949403217224e-07, "loss": 0.4768, "step": 12754 }, { "epoch": 1.85, "grad_norm": 7.641163349151611, "learning_rate": 6.882445422650354e-07, "loss": 0.5774, "step": 12755 }, { "epoch": 1.85, "grad_norm": 7.096894264221191, "learning_rate": 6.880941520188752e-07, "loss": 0.5085, "step": 12756 }, { "epoch": 1.85, "grad_norm": 6.91860818862915, "learning_rate": 6.879437695870089e-07, "loss": 0.5197, "step": 12757 }, { "epoch": 1.85, "grad_norm": 6.99713134765625, "learning_rate": 6.877933949732046e-07, "loss": 0.515, "step": 12758 }, { "epoch": 1.85, "grad_norm": 6.673231601715088, "learning_rate": 6.876430281812296e-07, "loss": 0.4714, "step": 12759 }, { "epoch": 1.85, "grad_norm": 6.914198398590088, "learning_rate": 6.874926692148509e-07, "loss": 0.5146, "step": 12760 }, { "epoch": 1.85, "grad_norm": 6.898693561553955, "learning_rate": 6.873423180778354e-07, "loss": 0.534, "step": 12761 }, { "epoch": 1.85, "grad_norm": 7.5386481285095215, "learning_rate": 6.871919747739504e-07, "loss": 0.5185, "step": 12762 }, { "epoch": 1.85, "grad_norm": 6.794033527374268, "learning_rate": 6.870416393069618e-07, "loss": 0.6091, "step": 12763 }, { "epoch": 1.85, "grad_norm": 6.096177577972412, "learning_rate": 6.868913116806368e-07, "loss": 0.4536, "step": 12764 }, { "epoch": 1.85, "grad_norm": 6.057788848876953, "learning_rate": 6.867409918987412e-07, "loss": 0.4802, "step": 12765 }, { "epoch": 1.85, "grad_norm": 6.61602783203125, "learning_rate": 6.865906799650408e-07, "loss": 0.4869, "step": 12766 }, { "epoch": 1.85, "grad_norm": 7.057946681976318, "learning_rate": 6.864403758833022e-07, "loss": 0.5561, "step": 12767 }, { "epoch": 1.85, "grad_norm": 6.092167854309082, "learning_rate": 6.8629007965729e-07, "loss": 0.473, "step": 12768 }, { "epoch": 1.85, "grad_norm": 5.898869037628174, "learning_rate": 6.861397912907703e-07, "loss": 0.5181, "step": 12769 }, { "epoch": 1.85, "grad_norm": 6.853118896484375, "learning_rate": 6.859895107875082e-07, "loss": 0.5371, "step": 12770 }, { "epoch": 1.85, "grad_norm": 6.166200160980225, "learning_rate": 6.858392381512686e-07, "loss": 0.5435, "step": 12771 }, { "epoch": 1.85, "grad_norm": 6.655352592468262, "learning_rate": 6.856889733858164e-07, "loss": 0.5622, "step": 12772 }, { "epoch": 1.85, "grad_norm": 6.532016754150391, "learning_rate": 6.855387164949167e-07, "loss": 0.5149, "step": 12773 }, { "epoch": 1.85, "grad_norm": 6.125007629394531, "learning_rate": 6.853884674823329e-07, "loss": 0.4651, "step": 12774 }, { "epoch": 1.85, "grad_norm": 6.818742752075195, "learning_rate": 6.852382263518304e-07, "loss": 0.5093, "step": 12775 }, { "epoch": 1.85, "grad_norm": 6.6771697998046875, "learning_rate": 6.85087993107172e-07, "loss": 0.5343, "step": 12776 }, { "epoch": 1.85, "grad_norm": 6.5025739669799805, "learning_rate": 6.84937767752123e-07, "loss": 0.5389, "step": 12777 }, { "epoch": 1.85, "grad_norm": 6.4020304679870605, "learning_rate": 6.847875502904458e-07, "loss": 0.484, "step": 12778 }, { "epoch": 1.85, "grad_norm": 6.725240707397461, "learning_rate": 6.846373407259049e-07, "loss": 0.5804, "step": 12779 }, { "epoch": 1.85, "grad_norm": 6.920965671539307, "learning_rate": 6.844871390622624e-07, "loss": 0.5911, "step": 12780 }, { "epoch": 1.85, "grad_norm": 6.798309326171875, "learning_rate": 6.843369453032824e-07, "loss": 0.5881, "step": 12781 }, { "epoch": 1.85, "grad_norm": 6.177945137023926, "learning_rate": 6.841867594527271e-07, "loss": 0.5528, "step": 12782 }, { "epoch": 1.85, "grad_norm": 6.3187150955200195, "learning_rate": 6.840365815143597e-07, "loss": 0.5003, "step": 12783 }, { "epoch": 1.85, "grad_norm": 7.287890911102295, "learning_rate": 6.838864114919418e-07, "loss": 0.5337, "step": 12784 }, { "epoch": 1.86, "grad_norm": 7.231480598449707, "learning_rate": 6.837362493892368e-07, "loss": 0.5065, "step": 12785 }, { "epoch": 1.86, "grad_norm": 6.8297224044799805, "learning_rate": 6.835860952100059e-07, "loss": 0.4602, "step": 12786 }, { "epoch": 1.86, "grad_norm": 6.360692501068115, "learning_rate": 6.834359489580115e-07, "loss": 0.5484, "step": 12787 }, { "epoch": 1.86, "grad_norm": 6.013820171356201, "learning_rate": 6.832858106370147e-07, "loss": 0.486, "step": 12788 }, { "epoch": 1.86, "grad_norm": 6.788790702819824, "learning_rate": 6.831356802507778e-07, "loss": 0.5031, "step": 12789 }, { "epoch": 1.86, "grad_norm": 7.328233242034912, "learning_rate": 6.82985557803061e-07, "loss": 0.5539, "step": 12790 }, { "epoch": 1.86, "grad_norm": 6.478183746337891, "learning_rate": 6.828354432976261e-07, "loss": 0.5035, "step": 12791 }, { "epoch": 1.86, "grad_norm": 7.048593044281006, "learning_rate": 6.826853367382341e-07, "loss": 0.5137, "step": 12792 }, { "epoch": 1.86, "grad_norm": 6.416378974914551, "learning_rate": 6.825352381286451e-07, "loss": 0.4766, "step": 12793 }, { "epoch": 1.86, "grad_norm": 7.338348865509033, "learning_rate": 6.823851474726202e-07, "loss": 0.5555, "step": 12794 }, { "epoch": 1.86, "grad_norm": 6.491933345794678, "learning_rate": 6.82235064773919e-07, "loss": 0.5178, "step": 12795 }, { "epoch": 1.86, "grad_norm": 6.497623443603516, "learning_rate": 6.820849900363024e-07, "loss": 0.5152, "step": 12796 }, { "epoch": 1.86, "grad_norm": 6.878032684326172, "learning_rate": 6.819349232635292e-07, "loss": 0.5632, "step": 12797 }, { "epoch": 1.86, "grad_norm": 7.197262763977051, "learning_rate": 6.817848644593603e-07, "loss": 0.5337, "step": 12798 }, { "epoch": 1.86, "grad_norm": 6.2183098793029785, "learning_rate": 6.816348136275543e-07, "loss": 0.5485, "step": 12799 }, { "epoch": 1.86, "grad_norm": 6.9347639083862305, "learning_rate": 6.814847707718709e-07, "loss": 0.53, "step": 12800 }, { "epoch": 1.86, "grad_norm": 6.517423152923584, "learning_rate": 6.813347358960688e-07, "loss": 0.4444, "step": 12801 }, { "epoch": 1.86, "grad_norm": 6.679914474487305, "learning_rate": 6.811847090039073e-07, "loss": 0.535, "step": 12802 }, { "epoch": 1.86, "grad_norm": 5.462991714477539, "learning_rate": 6.810346900991446e-07, "loss": 0.4809, "step": 12803 }, { "epoch": 1.86, "grad_norm": 6.969209671020508, "learning_rate": 6.808846791855399e-07, "loss": 0.5197, "step": 12804 }, { "epoch": 1.86, "grad_norm": 6.366352081298828, "learning_rate": 6.807346762668506e-07, "loss": 0.5071, "step": 12805 }, { "epoch": 1.86, "grad_norm": 8.203301429748535, "learning_rate": 6.805846813468358e-07, "loss": 0.6004, "step": 12806 }, { "epoch": 1.86, "grad_norm": 6.830564498901367, "learning_rate": 6.804346944292525e-07, "loss": 0.5542, "step": 12807 }, { "epoch": 1.86, "grad_norm": 7.411585330963135, "learning_rate": 6.802847155178588e-07, "loss": 0.5414, "step": 12808 }, { "epoch": 1.86, "grad_norm": 6.591519355773926, "learning_rate": 6.801347446164118e-07, "loss": 0.5186, "step": 12809 }, { "epoch": 1.86, "grad_norm": 6.814453601837158, "learning_rate": 6.799847817286694e-07, "loss": 0.5647, "step": 12810 }, { "epoch": 1.86, "grad_norm": 7.084091663360596, "learning_rate": 6.798348268583879e-07, "loss": 0.5672, "step": 12811 }, { "epoch": 1.86, "grad_norm": 6.985785484313965, "learning_rate": 6.796848800093251e-07, "loss": 0.5125, "step": 12812 }, { "epoch": 1.86, "grad_norm": 6.676976203918457, "learning_rate": 6.795349411852369e-07, "loss": 0.5469, "step": 12813 }, { "epoch": 1.86, "grad_norm": 6.2061309814453125, "learning_rate": 6.793850103898802e-07, "loss": 0.5559, "step": 12814 }, { "epoch": 1.86, "grad_norm": 6.4182891845703125, "learning_rate": 6.792350876270107e-07, "loss": 0.5694, "step": 12815 }, { "epoch": 1.86, "grad_norm": 7.598523139953613, "learning_rate": 6.79085172900385e-07, "loss": 0.5929, "step": 12816 }, { "epoch": 1.86, "grad_norm": 5.80228328704834, "learning_rate": 6.789352662137591e-07, "loss": 0.4767, "step": 12817 }, { "epoch": 1.86, "grad_norm": 6.090834617614746, "learning_rate": 6.787853675708883e-07, "loss": 0.5411, "step": 12818 }, { "epoch": 1.86, "grad_norm": 6.288299083709717, "learning_rate": 6.786354769755284e-07, "loss": 0.5122, "step": 12819 }, { "epoch": 1.86, "grad_norm": 6.347929000854492, "learning_rate": 6.784855944314343e-07, "loss": 0.5095, "step": 12820 }, { "epoch": 1.86, "grad_norm": 7.025124549865723, "learning_rate": 6.783357199423614e-07, "loss": 0.5603, "step": 12821 }, { "epoch": 1.86, "grad_norm": 7.313697814941406, "learning_rate": 6.781858535120641e-07, "loss": 0.541, "step": 12822 }, { "epoch": 1.86, "grad_norm": 7.189023971557617, "learning_rate": 6.780359951442975e-07, "loss": 0.5574, "step": 12823 }, { "epoch": 1.86, "grad_norm": 6.663774490356445, "learning_rate": 6.778861448428157e-07, "loss": 0.5396, "step": 12824 }, { "epoch": 1.86, "grad_norm": 6.188589096069336, "learning_rate": 6.777363026113736e-07, "loss": 0.5135, "step": 12825 }, { "epoch": 1.86, "grad_norm": 6.338672161102295, "learning_rate": 6.775864684537244e-07, "loss": 0.5166, "step": 12826 }, { "epoch": 1.86, "grad_norm": 6.360091686248779, "learning_rate": 6.774366423736226e-07, "loss": 0.547, "step": 12827 }, { "epoch": 1.86, "grad_norm": 7.089884281158447, "learning_rate": 6.772868243748215e-07, "loss": 0.603, "step": 12828 }, { "epoch": 1.86, "grad_norm": 6.790706634521484, "learning_rate": 6.771370144610749e-07, "loss": 0.5822, "step": 12829 }, { "epoch": 1.86, "grad_norm": 5.800413131713867, "learning_rate": 6.769872126361354e-07, "loss": 0.4908, "step": 12830 }, { "epoch": 1.86, "grad_norm": 7.023275852203369, "learning_rate": 6.768374189037569e-07, "loss": 0.4805, "step": 12831 }, { "epoch": 1.86, "grad_norm": 6.3664231300354, "learning_rate": 6.766876332676913e-07, "loss": 0.5791, "step": 12832 }, { "epoch": 1.86, "grad_norm": 6.9067606925964355, "learning_rate": 6.76537855731692e-07, "loss": 0.537, "step": 12833 }, { "epoch": 1.86, "grad_norm": 5.320244312286377, "learning_rate": 6.763880862995112e-07, "loss": 0.4887, "step": 12834 }, { "epoch": 1.86, "grad_norm": 5.7736639976501465, "learning_rate": 6.762383249749013e-07, "loss": 0.5113, "step": 12835 }, { "epoch": 1.86, "grad_norm": 6.346868515014648, "learning_rate": 6.760885717616136e-07, "loss": 0.4379, "step": 12836 }, { "epoch": 1.86, "grad_norm": 5.5611796379089355, "learning_rate": 6.759388266634007e-07, "loss": 0.5237, "step": 12837 }, { "epoch": 1.86, "grad_norm": 5.774713039398193, "learning_rate": 6.757890896840137e-07, "loss": 0.4937, "step": 12838 }, { "epoch": 1.86, "grad_norm": 5.896284103393555, "learning_rate": 6.756393608272046e-07, "loss": 0.4965, "step": 12839 }, { "epoch": 1.86, "grad_norm": 6.471075534820557, "learning_rate": 6.75489640096724e-07, "loss": 0.5495, "step": 12840 }, { "epoch": 1.86, "grad_norm": 6.126833915710449, "learning_rate": 6.753399274963236e-07, "loss": 0.4722, "step": 12841 }, { "epoch": 1.86, "grad_norm": 6.226141452789307, "learning_rate": 6.751902230297535e-07, "loss": 0.5092, "step": 12842 }, { "epoch": 1.86, "grad_norm": 6.593786239624023, "learning_rate": 6.750405267007644e-07, "loss": 0.4468, "step": 12843 }, { "epoch": 1.86, "grad_norm": 6.318471431732178, "learning_rate": 6.748908385131074e-07, "loss": 0.4993, "step": 12844 }, { "epoch": 1.86, "grad_norm": 7.372002124786377, "learning_rate": 6.747411584705316e-07, "loss": 0.5633, "step": 12845 }, { "epoch": 1.86, "grad_norm": 6.380067348480225, "learning_rate": 6.745914865767881e-07, "loss": 0.4899, "step": 12846 }, { "epoch": 1.86, "grad_norm": 6.21286153793335, "learning_rate": 6.744418228356261e-07, "loss": 0.4938, "step": 12847 }, { "epoch": 1.86, "grad_norm": 6.366415023803711, "learning_rate": 6.742921672507953e-07, "loss": 0.5828, "step": 12848 }, { "epoch": 1.86, "grad_norm": 5.908890247344971, "learning_rate": 6.741425198260446e-07, "loss": 0.4601, "step": 12849 }, { "epoch": 1.86, "grad_norm": 6.739516735076904, "learning_rate": 6.73992880565124e-07, "loss": 0.5205, "step": 12850 }, { "epoch": 1.86, "grad_norm": 5.863845348358154, "learning_rate": 6.738432494717819e-07, "loss": 0.4527, "step": 12851 }, { "epoch": 1.86, "grad_norm": 6.328537940979004, "learning_rate": 6.736936265497674e-07, "loss": 0.5224, "step": 12852 }, { "epoch": 1.86, "grad_norm": 6.0573506355285645, "learning_rate": 6.735440118028286e-07, "loss": 0.5589, "step": 12853 }, { "epoch": 1.87, "grad_norm": 6.906479358673096, "learning_rate": 6.733944052347144e-07, "loss": 0.5118, "step": 12854 }, { "epoch": 1.87, "grad_norm": 7.615565299987793, "learning_rate": 6.732448068491727e-07, "loss": 0.5478, "step": 12855 }, { "epoch": 1.87, "grad_norm": 6.3436455726623535, "learning_rate": 6.730952166499515e-07, "loss": 0.5044, "step": 12856 }, { "epoch": 1.87, "grad_norm": 5.878693580627441, "learning_rate": 6.729456346407982e-07, "loss": 0.484, "step": 12857 }, { "epoch": 1.87, "grad_norm": 5.727260112762451, "learning_rate": 6.727960608254611e-07, "loss": 0.4678, "step": 12858 }, { "epoch": 1.87, "grad_norm": 7.371603488922119, "learning_rate": 6.726464952076864e-07, "loss": 0.518, "step": 12859 }, { "epoch": 1.87, "grad_norm": 6.591201305389404, "learning_rate": 6.724969377912224e-07, "loss": 0.5704, "step": 12860 }, { "epoch": 1.87, "grad_norm": 6.301693439483643, "learning_rate": 6.723473885798152e-07, "loss": 0.5036, "step": 12861 }, { "epoch": 1.87, "grad_norm": 7.082736015319824, "learning_rate": 6.721978475772119e-07, "loss": 0.4981, "step": 12862 }, { "epoch": 1.87, "grad_norm": 6.336228847503662, "learning_rate": 6.720483147871588e-07, "loss": 0.509, "step": 12863 }, { "epoch": 1.87, "grad_norm": 6.582242965698242, "learning_rate": 6.718987902134026e-07, "loss": 0.5749, "step": 12864 }, { "epoch": 1.87, "grad_norm": 7.650717735290527, "learning_rate": 6.717492738596887e-07, "loss": 0.604, "step": 12865 }, { "epoch": 1.87, "grad_norm": 7.925252914428711, "learning_rate": 6.715997657297637e-07, "loss": 0.5551, "step": 12866 }, { "epoch": 1.87, "grad_norm": 6.022579669952393, "learning_rate": 6.714502658273726e-07, "loss": 0.5187, "step": 12867 }, { "epoch": 1.87, "grad_norm": 7.321075916290283, "learning_rate": 6.713007741562615e-07, "loss": 0.5459, "step": 12868 }, { "epoch": 1.87, "grad_norm": 6.186635494232178, "learning_rate": 6.711512907201757e-07, "loss": 0.4538, "step": 12869 }, { "epoch": 1.87, "grad_norm": 6.308111190795898, "learning_rate": 6.710018155228595e-07, "loss": 0.4549, "step": 12870 }, { "epoch": 1.87, "grad_norm": 6.729949951171875, "learning_rate": 6.708523485680586e-07, "loss": 0.5833, "step": 12871 }, { "epoch": 1.87, "grad_norm": 6.058885097503662, "learning_rate": 6.70702889859517e-07, "loss": 0.5034, "step": 12872 }, { "epoch": 1.87, "grad_norm": 7.290458679199219, "learning_rate": 6.705534394009797e-07, "loss": 0.6255, "step": 12873 }, { "epoch": 1.87, "grad_norm": 7.1772284507751465, "learning_rate": 6.704039971961907e-07, "loss": 0.5291, "step": 12874 }, { "epoch": 1.87, "grad_norm": 7.2562408447265625, "learning_rate": 6.702545632488938e-07, "loss": 0.5534, "step": 12875 }, { "epoch": 1.87, "grad_norm": 7.4226274490356445, "learning_rate": 6.701051375628331e-07, "loss": 0.5535, "step": 12876 }, { "epoch": 1.87, "grad_norm": 6.328950881958008, "learning_rate": 6.699557201417525e-07, "loss": 0.4755, "step": 12877 }, { "epoch": 1.87, "grad_norm": 7.707509517669678, "learning_rate": 6.698063109893946e-07, "loss": 0.6544, "step": 12878 }, { "epoch": 1.87, "grad_norm": 6.96749210357666, "learning_rate": 6.696569101095034e-07, "loss": 0.5849, "step": 12879 }, { "epoch": 1.87, "grad_norm": 6.00492000579834, "learning_rate": 6.695075175058211e-07, "loss": 0.5584, "step": 12880 }, { "epoch": 1.87, "grad_norm": 6.49302339553833, "learning_rate": 6.693581331820914e-07, "loss": 0.6107, "step": 12881 }, { "epoch": 1.87, "grad_norm": 6.109567165374756, "learning_rate": 6.692087571420564e-07, "loss": 0.5099, "step": 12882 }, { "epoch": 1.87, "grad_norm": 6.616759300231934, "learning_rate": 6.690593893894585e-07, "loss": 0.5504, "step": 12883 }, { "epoch": 1.87, "grad_norm": 6.396795749664307, "learning_rate": 6.689100299280396e-07, "loss": 0.4898, "step": 12884 }, { "epoch": 1.87, "grad_norm": 6.999815940856934, "learning_rate": 6.687606787615423e-07, "loss": 0.5351, "step": 12885 }, { "epoch": 1.87, "grad_norm": 6.688691139221191, "learning_rate": 6.686113358937075e-07, "loss": 0.4871, "step": 12886 }, { "epoch": 1.87, "grad_norm": 6.255842685699463, "learning_rate": 6.684620013282778e-07, "loss": 0.5116, "step": 12887 }, { "epoch": 1.87, "grad_norm": 5.676555633544922, "learning_rate": 6.683126750689933e-07, "loss": 0.5179, "step": 12888 }, { "epoch": 1.87, "grad_norm": 7.316249370574951, "learning_rate": 6.681633571195965e-07, "loss": 0.5471, "step": 12889 }, { "epoch": 1.87, "grad_norm": 7.038345813751221, "learning_rate": 6.680140474838273e-07, "loss": 0.4919, "step": 12890 }, { "epoch": 1.87, "grad_norm": 6.780351638793945, "learning_rate": 6.678647461654269e-07, "loss": 0.5571, "step": 12891 }, { "epoch": 1.87, "grad_norm": 5.8798675537109375, "learning_rate": 6.677154531681352e-07, "loss": 0.5047, "step": 12892 }, { "epoch": 1.87, "grad_norm": 6.4376749992370605, "learning_rate": 6.675661684956933e-07, "loss": 0.5395, "step": 12893 }, { "epoch": 1.87, "grad_norm": 6.516396999359131, "learning_rate": 6.674168921518406e-07, "loss": 0.5559, "step": 12894 }, { "epoch": 1.87, "grad_norm": 7.358599662780762, "learning_rate": 6.672676241403174e-07, "loss": 0.5325, "step": 12895 }, { "epoch": 1.87, "grad_norm": 7.800686359405518, "learning_rate": 6.671183644648633e-07, "loss": 0.5856, "step": 12896 }, { "epoch": 1.87, "grad_norm": 6.26773738861084, "learning_rate": 6.669691131292175e-07, "loss": 0.4882, "step": 12897 }, { "epoch": 1.87, "grad_norm": 5.834926128387451, "learning_rate": 6.668198701371199e-07, "loss": 0.4847, "step": 12898 }, { "epoch": 1.87, "grad_norm": 6.25845193862915, "learning_rate": 6.666706354923086e-07, "loss": 0.516, "step": 12899 }, { "epoch": 1.87, "grad_norm": 6.10737419128418, "learning_rate": 6.665214091985234e-07, "loss": 0.546, "step": 12900 }, { "epoch": 1.87, "grad_norm": 6.19706916809082, "learning_rate": 6.663721912595019e-07, "loss": 0.5126, "step": 12901 }, { "epoch": 1.87, "grad_norm": 7.3797736167907715, "learning_rate": 6.662229816789835e-07, "loss": 0.5532, "step": 12902 }, { "epoch": 1.87, "grad_norm": 6.192765712738037, "learning_rate": 6.660737804607058e-07, "loss": 0.4912, "step": 12903 }, { "epoch": 1.87, "grad_norm": 5.964660167694092, "learning_rate": 6.659245876084072e-07, "loss": 0.5377, "step": 12904 }, { "epoch": 1.87, "grad_norm": 6.73734712600708, "learning_rate": 6.657754031258247e-07, "loss": 0.5391, "step": 12905 }, { "epoch": 1.87, "grad_norm": 6.486755847930908, "learning_rate": 6.65626227016697e-07, "loss": 0.5305, "step": 12906 }, { "epoch": 1.87, "grad_norm": 6.898074626922607, "learning_rate": 6.654770592847604e-07, "loss": 0.5352, "step": 12907 }, { "epoch": 1.87, "grad_norm": 5.713380336761475, "learning_rate": 6.653278999337529e-07, "loss": 0.4708, "step": 12908 }, { "epoch": 1.87, "grad_norm": 7.116150856018066, "learning_rate": 6.651787489674108e-07, "loss": 0.5698, "step": 12909 }, { "epoch": 1.87, "grad_norm": 6.805396556854248, "learning_rate": 6.650296063894714e-07, "loss": 0.5486, "step": 12910 }, { "epoch": 1.87, "grad_norm": 6.070113182067871, "learning_rate": 6.648804722036707e-07, "loss": 0.4856, "step": 12911 }, { "epoch": 1.87, "grad_norm": 7.396758556365967, "learning_rate": 6.647313464137456e-07, "loss": 0.4951, "step": 12912 }, { "epoch": 1.87, "grad_norm": 6.425875663757324, "learning_rate": 6.645822290234313e-07, "loss": 0.5272, "step": 12913 }, { "epoch": 1.87, "grad_norm": 6.997859954833984, "learning_rate": 6.644331200364648e-07, "loss": 0.5236, "step": 12914 }, { "epoch": 1.87, "grad_norm": 7.422116756439209, "learning_rate": 6.642840194565808e-07, "loss": 0.5574, "step": 12915 }, { "epoch": 1.87, "grad_norm": 7.216044902801514, "learning_rate": 6.641349272875157e-07, "loss": 0.5891, "step": 12916 }, { "epoch": 1.87, "grad_norm": 5.993404388427734, "learning_rate": 6.639858435330039e-07, "loss": 0.5155, "step": 12917 }, { "epoch": 1.87, "grad_norm": 6.499329090118408, "learning_rate": 6.638367681967809e-07, "loss": 0.4832, "step": 12918 }, { "epoch": 1.87, "grad_norm": 6.2752299308776855, "learning_rate": 6.636877012825815e-07, "loss": 0.5578, "step": 12919 }, { "epoch": 1.87, "grad_norm": 6.124327659606934, "learning_rate": 6.635386427941399e-07, "loss": 0.4707, "step": 12920 }, { "epoch": 1.87, "grad_norm": 7.921152591705322, "learning_rate": 6.633895927351916e-07, "loss": 0.6218, "step": 12921 }, { "epoch": 1.87, "grad_norm": 6.169212818145752, "learning_rate": 6.632405511094696e-07, "loss": 0.5224, "step": 12922 }, { "epoch": 1.88, "grad_norm": 7.003262519836426, "learning_rate": 6.630915179207087e-07, "loss": 0.5388, "step": 12923 }, { "epoch": 1.88, "grad_norm": 7.248745441436768, "learning_rate": 6.629424931726424e-07, "loss": 0.4363, "step": 12924 }, { "epoch": 1.88, "grad_norm": 7.018618583679199, "learning_rate": 6.627934768690043e-07, "loss": 0.5062, "step": 12925 }, { "epoch": 1.88, "grad_norm": 7.276618003845215, "learning_rate": 6.626444690135273e-07, "loss": 0.5229, "step": 12926 }, { "epoch": 1.88, "grad_norm": 6.315990447998047, "learning_rate": 6.624954696099457e-07, "loss": 0.5236, "step": 12927 }, { "epoch": 1.88, "grad_norm": 6.699484825134277, "learning_rate": 6.62346478661991e-07, "loss": 0.5099, "step": 12928 }, { "epoch": 1.88, "grad_norm": 5.541433334350586, "learning_rate": 6.621974961733972e-07, "loss": 0.4654, "step": 12929 }, { "epoch": 1.88, "grad_norm": 6.3665547370910645, "learning_rate": 6.62048522147896e-07, "loss": 0.4742, "step": 12930 }, { "epoch": 1.88, "grad_norm": 6.603870391845703, "learning_rate": 6.6189955658922e-07, "loss": 0.4928, "step": 12931 }, { "epoch": 1.88, "grad_norm": 6.743312358856201, "learning_rate": 6.617505995011012e-07, "loss": 0.4824, "step": 12932 }, { "epoch": 1.88, "grad_norm": 6.419093608856201, "learning_rate": 6.616016508872718e-07, "loss": 0.512, "step": 12933 }, { "epoch": 1.88, "grad_norm": 7.291757106781006, "learning_rate": 6.614527107514628e-07, "loss": 0.464, "step": 12934 }, { "epoch": 1.88, "grad_norm": 7.488250255584717, "learning_rate": 6.613037790974064e-07, "loss": 0.5502, "step": 12935 }, { "epoch": 1.88, "grad_norm": 6.607933521270752, "learning_rate": 6.611548559288331e-07, "loss": 0.5625, "step": 12936 }, { "epoch": 1.88, "grad_norm": 7.1859259605407715, "learning_rate": 6.610059412494749e-07, "loss": 0.5617, "step": 12937 }, { "epoch": 1.88, "grad_norm": 6.149204730987549, "learning_rate": 6.608570350630616e-07, "loss": 0.5472, "step": 12938 }, { "epoch": 1.88, "grad_norm": 6.937991619110107, "learning_rate": 6.607081373733244e-07, "loss": 0.5583, "step": 12939 }, { "epoch": 1.88, "grad_norm": 6.836944103240967, "learning_rate": 6.605592481839932e-07, "loss": 0.4813, "step": 12940 }, { "epoch": 1.88, "grad_norm": 7.401187896728516, "learning_rate": 6.60410367498799e-07, "loss": 0.5812, "step": 12941 }, { "epoch": 1.88, "grad_norm": 7.523755073547363, "learning_rate": 6.602614953214708e-07, "loss": 0.5659, "step": 12942 }, { "epoch": 1.88, "grad_norm": 7.29170036315918, "learning_rate": 6.601126316557393e-07, "loss": 0.4782, "step": 12943 }, { "epoch": 1.88, "grad_norm": 6.585175037384033, "learning_rate": 6.59963776505333e-07, "loss": 0.5264, "step": 12944 }, { "epoch": 1.88, "grad_norm": 7.138868808746338, "learning_rate": 6.598149298739822e-07, "loss": 0.528, "step": 12945 }, { "epoch": 1.88, "grad_norm": 6.049439430236816, "learning_rate": 6.596660917654155e-07, "loss": 0.4634, "step": 12946 }, { "epoch": 1.88, "grad_norm": 6.534294605255127, "learning_rate": 6.595172621833616e-07, "loss": 0.5488, "step": 12947 }, { "epoch": 1.88, "grad_norm": 7.363222122192383, "learning_rate": 6.593684411315499e-07, "loss": 0.5552, "step": 12948 }, { "epoch": 1.88, "grad_norm": 6.793720722198486, "learning_rate": 6.59219628613708e-07, "loss": 0.5447, "step": 12949 }, { "epoch": 1.88, "grad_norm": 6.700918674468994, "learning_rate": 6.590708246335651e-07, "loss": 0.5887, "step": 12950 }, { "epoch": 1.88, "grad_norm": 6.649754047393799, "learning_rate": 6.589220291948485e-07, "loss": 0.5505, "step": 12951 }, { "epoch": 1.88, "grad_norm": 6.801697254180908, "learning_rate": 6.587732423012864e-07, "loss": 0.5652, "step": 12952 }, { "epoch": 1.88, "grad_norm": 6.471756458282471, "learning_rate": 6.586244639566059e-07, "loss": 0.52, "step": 12953 }, { "epoch": 1.88, "grad_norm": 6.612122058868408, "learning_rate": 6.584756941645353e-07, "loss": 0.5073, "step": 12954 }, { "epoch": 1.88, "grad_norm": 7.127066135406494, "learning_rate": 6.583269329288006e-07, "loss": 0.5204, "step": 12955 }, { "epoch": 1.88, "grad_norm": 6.678196430206299, "learning_rate": 6.581781802531301e-07, "loss": 0.5515, "step": 12956 }, { "epoch": 1.88, "grad_norm": 6.9750471115112305, "learning_rate": 6.580294361412492e-07, "loss": 0.5277, "step": 12957 }, { "epoch": 1.88, "grad_norm": 6.911440849304199, "learning_rate": 6.578807005968859e-07, "loss": 0.5878, "step": 12958 }, { "epoch": 1.88, "grad_norm": 8.430591583251953, "learning_rate": 6.577319736237654e-07, "loss": 0.541, "step": 12959 }, { "epoch": 1.88, "grad_norm": 7.049753189086914, "learning_rate": 6.575832552256143e-07, "loss": 0.5893, "step": 12960 }, { "epoch": 1.88, "grad_norm": 6.0527801513671875, "learning_rate": 6.57434545406158e-07, "loss": 0.467, "step": 12961 }, { "epoch": 1.88, "grad_norm": 6.667709827423096, "learning_rate": 6.57285844169123e-07, "loss": 0.5314, "step": 12962 }, { "epoch": 1.88, "grad_norm": 5.6073713302612305, "learning_rate": 6.571371515182339e-07, "loss": 0.536, "step": 12963 }, { "epoch": 1.88, "grad_norm": 6.701585292816162, "learning_rate": 6.569884674572169e-07, "loss": 0.4945, "step": 12964 }, { "epoch": 1.88, "grad_norm": 7.288135528564453, "learning_rate": 6.568397919897959e-07, "loss": 0.5498, "step": 12965 }, { "epoch": 1.88, "grad_norm": 6.529656887054443, "learning_rate": 6.566911251196969e-07, "loss": 0.5176, "step": 12966 }, { "epoch": 1.88, "grad_norm": 6.6436333656311035, "learning_rate": 6.565424668506436e-07, "loss": 0.5082, "step": 12967 }, { "epoch": 1.88, "grad_norm": 6.676721572875977, "learning_rate": 6.56393817186361e-07, "loss": 0.5258, "step": 12968 }, { "epoch": 1.88, "grad_norm": 7.263587474822998, "learning_rate": 6.562451761305725e-07, "loss": 0.5227, "step": 12969 }, { "epoch": 1.88, "grad_norm": 6.806744575500488, "learning_rate": 6.560965436870031e-07, "loss": 0.5943, "step": 12970 }, { "epoch": 1.88, "grad_norm": 7.271613597869873, "learning_rate": 6.559479198593756e-07, "loss": 0.5399, "step": 12971 }, { "epoch": 1.88, "grad_norm": 7.215531826019287, "learning_rate": 6.557993046514142e-07, "loss": 0.5319, "step": 12972 }, { "epoch": 1.88, "grad_norm": 6.598719120025635, "learning_rate": 6.556506980668421e-07, "loss": 0.51, "step": 12973 }, { "epoch": 1.88, "grad_norm": 7.44401741027832, "learning_rate": 6.555021001093819e-07, "loss": 0.5117, "step": 12974 }, { "epoch": 1.88, "grad_norm": 6.242506980895996, "learning_rate": 6.55353510782757e-07, "loss": 0.5416, "step": 12975 }, { "epoch": 1.88, "grad_norm": 6.626380443572998, "learning_rate": 6.552049300906898e-07, "loss": 0.5007, "step": 12976 }, { "epoch": 1.88, "grad_norm": 7.308243751525879, "learning_rate": 6.550563580369031e-07, "loss": 0.5688, "step": 12977 }, { "epoch": 1.88, "grad_norm": 6.042571544647217, "learning_rate": 6.549077946251184e-07, "loss": 0.4587, "step": 12978 }, { "epoch": 1.88, "grad_norm": 6.7706451416015625, "learning_rate": 6.547592398590587e-07, "loss": 0.5389, "step": 12979 }, { "epoch": 1.88, "grad_norm": 6.715534687042236, "learning_rate": 6.546106937424451e-07, "loss": 0.5775, "step": 12980 }, { "epoch": 1.88, "grad_norm": 7.5471086502075195, "learning_rate": 6.544621562789996e-07, "loss": 0.5404, "step": 12981 }, { "epoch": 1.88, "grad_norm": 7.0096435546875, "learning_rate": 6.543136274724428e-07, "loss": 0.5503, "step": 12982 }, { "epoch": 1.88, "grad_norm": 7.196796417236328, "learning_rate": 6.541651073264969e-07, "loss": 0.6275, "step": 12983 }, { "epoch": 1.88, "grad_norm": 6.821089267730713, "learning_rate": 6.540165958448818e-07, "loss": 0.5235, "step": 12984 }, { "epoch": 1.88, "grad_norm": 6.897543430328369, "learning_rate": 6.538680930313191e-07, "loss": 0.5395, "step": 12985 }, { "epoch": 1.88, "grad_norm": 6.718451499938965, "learning_rate": 6.537195988895289e-07, "loss": 0.571, "step": 12986 }, { "epoch": 1.88, "grad_norm": 6.276005744934082, "learning_rate": 6.535711134232315e-07, "loss": 0.4963, "step": 12987 }, { "epoch": 1.88, "grad_norm": 6.990967750549316, "learning_rate": 6.534226366361469e-07, "loss": 0.4472, "step": 12988 }, { "epoch": 1.88, "grad_norm": 7.73634147644043, "learning_rate": 6.532741685319951e-07, "loss": 0.5181, "step": 12989 }, { "epoch": 1.88, "grad_norm": 7.184655666351318, "learning_rate": 6.531257091144953e-07, "loss": 0.4866, "step": 12990 }, { "epoch": 1.88, "grad_norm": 7.487980842590332, "learning_rate": 6.529772583873678e-07, "loss": 0.5428, "step": 12991 }, { "epoch": 1.89, "grad_norm": 6.447434425354004, "learning_rate": 6.528288163543307e-07, "loss": 0.5562, "step": 12992 }, { "epoch": 1.89, "grad_norm": 7.539093494415283, "learning_rate": 6.526803830191039e-07, "loss": 0.494, "step": 12993 }, { "epoch": 1.89, "grad_norm": 7.102063179016113, "learning_rate": 6.525319583854055e-07, "loss": 0.5565, "step": 12994 }, { "epoch": 1.89, "grad_norm": 6.92250919342041, "learning_rate": 6.523835424569546e-07, "loss": 0.5595, "step": 12995 }, { "epoch": 1.89, "grad_norm": 6.835840225219727, "learning_rate": 6.522351352374689e-07, "loss": 0.5055, "step": 12996 }, { "epoch": 1.89, "grad_norm": 6.565232753753662, "learning_rate": 6.520867367306672e-07, "loss": 0.5523, "step": 12997 }, { "epoch": 1.89, "grad_norm": 7.04346227645874, "learning_rate": 6.519383469402665e-07, "loss": 0.5997, "step": 12998 }, { "epoch": 1.89, "grad_norm": 7.523022651672363, "learning_rate": 6.517899658699853e-07, "loss": 0.4864, "step": 12999 }, { "epoch": 1.89, "grad_norm": 7.046231269836426, "learning_rate": 6.516415935235406e-07, "loss": 0.5591, "step": 13000 }, { "epoch": 1.89, "grad_norm": 7.434483528137207, "learning_rate": 6.514932299046499e-07, "loss": 0.5759, "step": 13001 }, { "epoch": 1.89, "grad_norm": 7.0616326332092285, "learning_rate": 6.513448750170301e-07, "loss": 0.5743, "step": 13002 }, { "epoch": 1.89, "grad_norm": 6.449273586273193, "learning_rate": 6.511965288643977e-07, "loss": 0.5137, "step": 13003 }, { "epoch": 1.89, "grad_norm": 6.4831061363220215, "learning_rate": 6.510481914504697e-07, "loss": 0.4894, "step": 13004 }, { "epoch": 1.89, "grad_norm": 6.238692283630371, "learning_rate": 6.508998627789621e-07, "loss": 0.5451, "step": 13005 }, { "epoch": 1.89, "grad_norm": 7.158144950866699, "learning_rate": 6.507515428535916e-07, "loss": 0.5382, "step": 13006 }, { "epoch": 1.89, "grad_norm": 7.336775302886963, "learning_rate": 6.506032316780734e-07, "loss": 0.5626, "step": 13007 }, { "epoch": 1.89, "grad_norm": 7.7388434410095215, "learning_rate": 6.504549292561239e-07, "loss": 0.5187, "step": 13008 }, { "epoch": 1.89, "grad_norm": 6.895740032196045, "learning_rate": 6.503066355914577e-07, "loss": 0.5077, "step": 13009 }, { "epoch": 1.89, "grad_norm": 6.561791896820068, "learning_rate": 6.501583506877908e-07, "loss": 0.5436, "step": 13010 }, { "epoch": 1.89, "grad_norm": 6.901901721954346, "learning_rate": 6.500100745488377e-07, "loss": 0.5713, "step": 13011 }, { "epoch": 1.89, "grad_norm": 6.982304096221924, "learning_rate": 6.49861807178314e-07, "loss": 0.5761, "step": 13012 }, { "epoch": 1.89, "grad_norm": 6.762457847595215, "learning_rate": 6.497135485799334e-07, "loss": 0.4992, "step": 13013 }, { "epoch": 1.89, "grad_norm": 5.966675281524658, "learning_rate": 6.495652987574109e-07, "loss": 0.4953, "step": 13014 }, { "epoch": 1.89, "grad_norm": 7.29843282699585, "learning_rate": 6.494170577144605e-07, "loss": 0.5559, "step": 13015 }, { "epoch": 1.89, "grad_norm": 5.929720878601074, "learning_rate": 6.492688254547961e-07, "loss": 0.4784, "step": 13016 }, { "epoch": 1.89, "grad_norm": 5.972328186035156, "learning_rate": 6.491206019821311e-07, "loss": 0.4258, "step": 13017 }, { "epoch": 1.89, "grad_norm": 6.712803363800049, "learning_rate": 6.489723873001795e-07, "loss": 0.5599, "step": 13018 }, { "epoch": 1.89, "grad_norm": 7.359146595001221, "learning_rate": 6.488241814126543e-07, "loss": 0.5347, "step": 13019 }, { "epoch": 1.89, "grad_norm": 7.154025077819824, "learning_rate": 6.486759843232688e-07, "loss": 0.5957, "step": 13020 }, { "epoch": 1.89, "grad_norm": 6.6429524421691895, "learning_rate": 6.485277960357356e-07, "loss": 0.4898, "step": 13021 }, { "epoch": 1.89, "grad_norm": 6.416459083557129, "learning_rate": 6.483796165537672e-07, "loss": 0.4539, "step": 13022 }, { "epoch": 1.89, "grad_norm": 6.5650529861450195, "learning_rate": 6.482314458810764e-07, "loss": 0.5246, "step": 13023 }, { "epoch": 1.89, "grad_norm": 6.699010848999023, "learning_rate": 6.480832840213752e-07, "loss": 0.57, "step": 13024 }, { "epoch": 1.89, "grad_norm": 7.37716817855835, "learning_rate": 6.479351309783752e-07, "loss": 0.5448, "step": 13025 }, { "epoch": 1.89, "grad_norm": 6.751424312591553, "learning_rate": 6.477869867557883e-07, "loss": 0.4819, "step": 13026 }, { "epoch": 1.89, "grad_norm": 7.283400535583496, "learning_rate": 6.476388513573267e-07, "loss": 0.5985, "step": 13027 }, { "epoch": 1.89, "grad_norm": 6.451759338378906, "learning_rate": 6.47490724786701e-07, "loss": 0.5648, "step": 13028 }, { "epoch": 1.89, "grad_norm": 6.867169380187988, "learning_rate": 6.473426070476226e-07, "loss": 0.5207, "step": 13029 }, { "epoch": 1.89, "grad_norm": 6.466559886932373, "learning_rate": 6.471944981438018e-07, "loss": 0.4856, "step": 13030 }, { "epoch": 1.89, "grad_norm": 7.758723735809326, "learning_rate": 6.470463980789499e-07, "loss": 0.582, "step": 13031 }, { "epoch": 1.89, "grad_norm": 6.121469497680664, "learning_rate": 6.468983068567766e-07, "loss": 0.5192, "step": 13032 }, { "epoch": 1.89, "grad_norm": 6.212442874908447, "learning_rate": 6.46750224480993e-07, "loss": 0.4774, "step": 13033 }, { "epoch": 1.89, "grad_norm": 6.8400797843933105, "learning_rate": 6.466021509553082e-07, "loss": 0.5075, "step": 13034 }, { "epoch": 1.89, "grad_norm": 6.163206577301025, "learning_rate": 6.464540862834324e-07, "loss": 0.469, "step": 13035 }, { "epoch": 1.89, "grad_norm": 6.781006813049316, "learning_rate": 6.46306030469075e-07, "loss": 0.4669, "step": 13036 }, { "epoch": 1.89, "grad_norm": 6.641062259674072, "learning_rate": 6.461579835159457e-07, "loss": 0.5569, "step": 13037 }, { "epoch": 1.89, "grad_norm": 7.781569957733154, "learning_rate": 6.460099454277525e-07, "loss": 0.5489, "step": 13038 }, { "epoch": 1.89, "grad_norm": 5.758706569671631, "learning_rate": 6.458619162082054e-07, "loss": 0.4854, "step": 13039 }, { "epoch": 1.89, "grad_norm": 6.868457794189453, "learning_rate": 6.457138958610122e-07, "loss": 0.5125, "step": 13040 }, { "epoch": 1.89, "grad_norm": 7.184030055999756, "learning_rate": 6.455658843898821e-07, "loss": 0.4828, "step": 13041 }, { "epoch": 1.89, "grad_norm": 6.0120625495910645, "learning_rate": 6.454178817985228e-07, "loss": 0.4967, "step": 13042 }, { "epoch": 1.89, "grad_norm": 6.0668535232543945, "learning_rate": 6.452698880906424e-07, "loss": 0.4847, "step": 13043 }, { "epoch": 1.89, "grad_norm": 6.559893608093262, "learning_rate": 6.451219032699481e-07, "loss": 0.5057, "step": 13044 }, { "epoch": 1.89, "grad_norm": 7.523000717163086, "learning_rate": 6.449739273401485e-07, "loss": 0.5745, "step": 13045 }, { "epoch": 1.89, "grad_norm": 6.28355073928833, "learning_rate": 6.448259603049497e-07, "loss": 0.5082, "step": 13046 }, { "epoch": 1.89, "grad_norm": 6.793603897094727, "learning_rate": 6.446780021680599e-07, "loss": 0.5865, "step": 13047 }, { "epoch": 1.89, "grad_norm": 6.457259178161621, "learning_rate": 6.44530052933185e-07, "loss": 0.52, "step": 13048 }, { "epoch": 1.89, "grad_norm": 6.354909420013428, "learning_rate": 6.443821126040323e-07, "loss": 0.5604, "step": 13049 }, { "epoch": 1.89, "grad_norm": 6.5745673179626465, "learning_rate": 6.442341811843079e-07, "loss": 0.5573, "step": 13050 }, { "epoch": 1.89, "grad_norm": 6.5716094970703125, "learning_rate": 6.440862586777179e-07, "loss": 0.5102, "step": 13051 }, { "epoch": 1.89, "grad_norm": 7.774842739105225, "learning_rate": 6.439383450879687e-07, "loss": 0.5641, "step": 13052 }, { "epoch": 1.89, "grad_norm": 6.294943809509277, "learning_rate": 6.437904404187653e-07, "loss": 0.5096, "step": 13053 }, { "epoch": 1.89, "grad_norm": 6.351953983306885, "learning_rate": 6.436425446738141e-07, "loss": 0.4477, "step": 13054 }, { "epoch": 1.89, "grad_norm": 6.42684268951416, "learning_rate": 6.434946578568195e-07, "loss": 0.5152, "step": 13055 }, { "epoch": 1.89, "grad_norm": 6.569042682647705, "learning_rate": 6.433467799714872e-07, "loss": 0.5552, "step": 13056 }, { "epoch": 1.89, "grad_norm": 6.678198337554932, "learning_rate": 6.431989110215218e-07, "loss": 0.5519, "step": 13057 }, { "epoch": 1.89, "grad_norm": 6.151791095733643, "learning_rate": 6.430510510106282e-07, "loss": 0.5356, "step": 13058 }, { "epoch": 1.89, "grad_norm": 6.947995662689209, "learning_rate": 6.429031999425101e-07, "loss": 0.4715, "step": 13059 }, { "epoch": 1.89, "grad_norm": 7.072301387786865, "learning_rate": 6.427553578208724e-07, "loss": 0.614, "step": 13060 }, { "epoch": 1.9, "grad_norm": 6.651301860809326, "learning_rate": 6.426075246494184e-07, "loss": 0.5216, "step": 13061 }, { "epoch": 1.9, "grad_norm": 6.127685070037842, "learning_rate": 6.424597004318526e-07, "loss": 0.5089, "step": 13062 }, { "epoch": 1.9, "grad_norm": 5.781857013702393, "learning_rate": 6.423118851718778e-07, "loss": 0.5029, "step": 13063 }, { "epoch": 1.9, "grad_norm": 6.5275444984436035, "learning_rate": 6.421640788731979e-07, "loss": 0.5424, "step": 13064 }, { "epoch": 1.9, "grad_norm": 7.329829216003418, "learning_rate": 6.42016281539515e-07, "loss": 0.6342, "step": 13065 }, { "epoch": 1.9, "grad_norm": 6.850668907165527, "learning_rate": 6.41868493174533e-07, "loss": 0.4944, "step": 13066 }, { "epoch": 1.9, "grad_norm": 7.003731727600098, "learning_rate": 6.417207137819536e-07, "loss": 0.5672, "step": 13067 }, { "epoch": 1.9, "grad_norm": 6.484017372131348, "learning_rate": 6.415729433654799e-07, "loss": 0.4634, "step": 13068 }, { "epoch": 1.9, "grad_norm": 6.9060378074646, "learning_rate": 6.414251819288134e-07, "loss": 0.5291, "step": 13069 }, { "epoch": 1.9, "grad_norm": 6.176779270172119, "learning_rate": 6.412774294756566e-07, "loss": 0.5101, "step": 13070 }, { "epoch": 1.9, "grad_norm": 6.894355297088623, "learning_rate": 6.411296860097109e-07, "loss": 0.5019, "step": 13071 }, { "epoch": 1.9, "grad_norm": 6.6671462059021, "learning_rate": 6.409819515346781e-07, "loss": 0.5288, "step": 13072 }, { "epoch": 1.9, "grad_norm": 6.785267353057861, "learning_rate": 6.408342260542585e-07, "loss": 0.5133, "step": 13073 }, { "epoch": 1.9, "grad_norm": 6.472533226013184, "learning_rate": 6.406865095721543e-07, "loss": 0.5054, "step": 13074 }, { "epoch": 1.9, "grad_norm": 6.279034614562988, "learning_rate": 6.405388020920655e-07, "loss": 0.4747, "step": 13075 }, { "epoch": 1.9, "grad_norm": 6.4676337242126465, "learning_rate": 6.403911036176932e-07, "loss": 0.5691, "step": 13076 }, { "epoch": 1.9, "grad_norm": 6.270296096801758, "learning_rate": 6.402434141527373e-07, "loss": 0.5735, "step": 13077 }, { "epoch": 1.9, "grad_norm": 8.466111183166504, "learning_rate": 6.40095733700898e-07, "loss": 0.6006, "step": 13078 }, { "epoch": 1.9, "grad_norm": 5.876473426818848, "learning_rate": 6.399480622658757e-07, "loss": 0.5422, "step": 13079 }, { "epoch": 1.9, "grad_norm": 6.516573429107666, "learning_rate": 6.398003998513693e-07, "loss": 0.4987, "step": 13080 }, { "epoch": 1.9, "grad_norm": 6.0902557373046875, "learning_rate": 6.396527464610788e-07, "loss": 0.5163, "step": 13081 }, { "epoch": 1.9, "grad_norm": 6.6813459396362305, "learning_rate": 6.39505102098703e-07, "loss": 0.5478, "step": 13082 }, { "epoch": 1.9, "grad_norm": 7.341728687286377, "learning_rate": 6.393574667679415e-07, "loss": 0.5812, "step": 13083 }, { "epoch": 1.9, "grad_norm": 6.4466681480407715, "learning_rate": 6.392098404724926e-07, "loss": 0.5045, "step": 13084 }, { "epoch": 1.9, "grad_norm": 6.9854044914245605, "learning_rate": 6.390622232160549e-07, "loss": 0.4918, "step": 13085 }, { "epoch": 1.9, "grad_norm": 6.660172939300537, "learning_rate": 6.389146150023266e-07, "loss": 0.5351, "step": 13086 }, { "epoch": 1.9, "grad_norm": 6.390442848205566, "learning_rate": 6.387670158350062e-07, "loss": 0.5313, "step": 13087 }, { "epoch": 1.9, "grad_norm": 7.170241355895996, "learning_rate": 6.386194257177908e-07, "loss": 0.5464, "step": 13088 }, { "epoch": 1.9, "grad_norm": 6.674422740936279, "learning_rate": 6.38471844654379e-07, "loss": 0.5325, "step": 13089 }, { "epoch": 1.9, "grad_norm": 6.638798236846924, "learning_rate": 6.383242726484674e-07, "loss": 0.4967, "step": 13090 }, { "epoch": 1.9, "grad_norm": 6.498988628387451, "learning_rate": 6.381767097037536e-07, "loss": 0.574, "step": 13091 }, { "epoch": 1.9, "grad_norm": 6.865296840667725, "learning_rate": 6.380291558239343e-07, "loss": 0.5453, "step": 13092 }, { "epoch": 1.9, "grad_norm": 5.781621932983398, "learning_rate": 6.378816110127067e-07, "loss": 0.486, "step": 13093 }, { "epoch": 1.9, "grad_norm": 6.389809608459473, "learning_rate": 6.377340752737665e-07, "loss": 0.4603, "step": 13094 }, { "epoch": 1.9, "grad_norm": 6.818089962005615, "learning_rate": 6.37586548610811e-07, "loss": 0.5125, "step": 13095 }, { "epoch": 1.9, "grad_norm": 7.169272422790527, "learning_rate": 6.374390310275351e-07, "loss": 0.5506, "step": 13096 }, { "epoch": 1.9, "grad_norm": 6.146816253662109, "learning_rate": 6.372915225276355e-07, "loss": 0.452, "step": 13097 }, { "epoch": 1.9, "grad_norm": 6.5132622718811035, "learning_rate": 6.371440231148074e-07, "loss": 0.5229, "step": 13098 }, { "epoch": 1.9, "grad_norm": 7.066079139709473, "learning_rate": 6.369965327927463e-07, "loss": 0.5357, "step": 13099 }, { "epoch": 1.9, "grad_norm": 6.294097900390625, "learning_rate": 6.36849051565147e-07, "loss": 0.5247, "step": 13100 }, { "epoch": 1.9, "grad_norm": 6.051542282104492, "learning_rate": 6.36701579435705e-07, "loss": 0.5162, "step": 13101 }, { "epoch": 1.9, "grad_norm": 6.388638019561768, "learning_rate": 6.365541164081142e-07, "loss": 0.4956, "step": 13102 }, { "epoch": 1.9, "grad_norm": 7.060705661773682, "learning_rate": 6.364066624860696e-07, "loss": 0.4994, "step": 13103 }, { "epoch": 1.9, "grad_norm": 6.146876811981201, "learning_rate": 6.362592176732654e-07, "loss": 0.4681, "step": 13104 }, { "epoch": 1.9, "grad_norm": 6.670332908630371, "learning_rate": 6.361117819733954e-07, "loss": 0.5302, "step": 13105 }, { "epoch": 1.9, "grad_norm": 7.470135688781738, "learning_rate": 6.359643553901538e-07, "loss": 0.5556, "step": 13106 }, { "epoch": 1.9, "grad_norm": 7.087244987487793, "learning_rate": 6.358169379272334e-07, "loss": 0.543, "step": 13107 }, { "epoch": 1.9, "grad_norm": 7.1883955001831055, "learning_rate": 6.356695295883281e-07, "loss": 0.5349, "step": 13108 }, { "epoch": 1.9, "grad_norm": 7.99462890625, "learning_rate": 6.355221303771303e-07, "loss": 0.5769, "step": 13109 }, { "epoch": 1.9, "grad_norm": 6.878045082092285, "learning_rate": 6.353747402973339e-07, "loss": 0.513, "step": 13110 }, { "epoch": 1.9, "grad_norm": 6.716449737548828, "learning_rate": 6.352273593526307e-07, "loss": 0.504, "step": 13111 }, { "epoch": 1.9, "grad_norm": 5.990809440612793, "learning_rate": 6.350799875467134e-07, "loss": 0.5567, "step": 13112 }, { "epoch": 1.9, "grad_norm": 7.758854866027832, "learning_rate": 6.349326248832738e-07, "loss": 0.6077, "step": 13113 }, { "epoch": 1.9, "grad_norm": 6.058319091796875, "learning_rate": 6.347852713660046e-07, "loss": 0.4399, "step": 13114 }, { "epoch": 1.9, "grad_norm": 6.130281448364258, "learning_rate": 6.346379269985964e-07, "loss": 0.467, "step": 13115 }, { "epoch": 1.9, "grad_norm": 6.978331565856934, "learning_rate": 6.344905917847418e-07, "loss": 0.5279, "step": 13116 }, { "epoch": 1.9, "grad_norm": 5.710531234741211, "learning_rate": 6.343432657281312e-07, "loss": 0.4835, "step": 13117 }, { "epoch": 1.9, "grad_norm": 6.803352355957031, "learning_rate": 6.341959488324562e-07, "loss": 0.5403, "step": 13118 }, { "epoch": 1.9, "grad_norm": 6.873170852661133, "learning_rate": 6.340486411014072e-07, "loss": 0.5165, "step": 13119 }, { "epoch": 1.9, "grad_norm": 6.662430286407471, "learning_rate": 6.339013425386751e-07, "loss": 0.5365, "step": 13120 }, { "epoch": 1.9, "grad_norm": 6.4803547859191895, "learning_rate": 6.337540531479496e-07, "loss": 0.5032, "step": 13121 }, { "epoch": 1.9, "grad_norm": 5.976278305053711, "learning_rate": 6.336067729329216e-07, "loss": 0.4879, "step": 13122 }, { "epoch": 1.9, "grad_norm": 7.023187160491943, "learning_rate": 6.334595018972801e-07, "loss": 0.5092, "step": 13123 }, { "epoch": 1.9, "grad_norm": 6.462897300720215, "learning_rate": 6.333122400447158e-07, "loss": 0.567, "step": 13124 }, { "epoch": 1.9, "grad_norm": 6.666470050811768, "learning_rate": 6.331649873789171e-07, "loss": 0.4326, "step": 13125 }, { "epoch": 1.9, "grad_norm": 5.987504482269287, "learning_rate": 6.330177439035739e-07, "loss": 0.4909, "step": 13126 }, { "epoch": 1.9, "grad_norm": 5.842765808105469, "learning_rate": 6.328705096223747e-07, "loss": 0.4487, "step": 13127 }, { "epoch": 1.9, "grad_norm": 6.644289493560791, "learning_rate": 6.327232845390084e-07, "loss": 0.4847, "step": 13128 }, { "epoch": 1.9, "grad_norm": 6.2995476722717285, "learning_rate": 6.325760686571633e-07, "loss": 0.5186, "step": 13129 }, { "epoch": 1.91, "grad_norm": 6.163344383239746, "learning_rate": 6.324288619805276e-07, "loss": 0.5272, "step": 13130 }, { "epoch": 1.91, "grad_norm": 6.11451530456543, "learning_rate": 6.3228166451279e-07, "loss": 0.5367, "step": 13131 }, { "epoch": 1.91, "grad_norm": 6.918127536773682, "learning_rate": 6.321344762576375e-07, "loss": 0.6283, "step": 13132 }, { "epoch": 1.91, "grad_norm": 6.989589691162109, "learning_rate": 6.319872972187584e-07, "loss": 0.5338, "step": 13133 }, { "epoch": 1.91, "grad_norm": 7.000489234924316, "learning_rate": 6.318401273998391e-07, "loss": 0.5273, "step": 13134 }, { "epoch": 1.91, "grad_norm": 6.691206455230713, "learning_rate": 6.316929668045676e-07, "loss": 0.5392, "step": 13135 }, { "epoch": 1.91, "grad_norm": 6.602540016174316, "learning_rate": 6.3154581543663e-07, "loss": 0.5671, "step": 13136 }, { "epoch": 1.91, "grad_norm": 6.933900833129883, "learning_rate": 6.313986732997137e-07, "loss": 0.4951, "step": 13137 }, { "epoch": 1.91, "grad_norm": 6.348569869995117, "learning_rate": 6.312515403975043e-07, "loss": 0.5023, "step": 13138 }, { "epoch": 1.91, "grad_norm": 5.884683132171631, "learning_rate": 6.311044167336887e-07, "loss": 0.4763, "step": 13139 }, { "epoch": 1.91, "grad_norm": 6.8783674240112305, "learning_rate": 6.309573023119526e-07, "loss": 0.5051, "step": 13140 }, { "epoch": 1.91, "grad_norm": 6.297981262207031, "learning_rate": 6.308101971359817e-07, "loss": 0.502, "step": 13141 }, { "epoch": 1.91, "grad_norm": 6.488484859466553, "learning_rate": 6.306631012094611e-07, "loss": 0.576, "step": 13142 }, { "epoch": 1.91, "grad_norm": 6.560131072998047, "learning_rate": 6.305160145360769e-07, "loss": 0.5254, "step": 13143 }, { "epoch": 1.91, "grad_norm": 7.15456485748291, "learning_rate": 6.303689371195129e-07, "loss": 0.5118, "step": 13144 }, { "epoch": 1.91, "grad_norm": 5.8679046630859375, "learning_rate": 6.302218689634553e-07, "loss": 0.4825, "step": 13145 }, { "epoch": 1.91, "grad_norm": 6.1383867263793945, "learning_rate": 6.300748100715876e-07, "loss": 0.5444, "step": 13146 }, { "epoch": 1.91, "grad_norm": 5.925792217254639, "learning_rate": 6.299277604475944e-07, "loss": 0.4906, "step": 13147 }, { "epoch": 1.91, "grad_norm": 6.757519245147705, "learning_rate": 6.2978072009516e-07, "loss": 0.5818, "step": 13148 }, { "epoch": 1.91, "grad_norm": 6.234529972076416, "learning_rate": 6.296336890179683e-07, "loss": 0.5556, "step": 13149 }, { "epoch": 1.91, "grad_norm": 5.813828945159912, "learning_rate": 6.294866672197022e-07, "loss": 0.4804, "step": 13150 }, { "epoch": 1.91, "grad_norm": 6.466607570648193, "learning_rate": 6.293396547040462e-07, "loss": 0.5936, "step": 13151 }, { "epoch": 1.91, "grad_norm": 6.151504039764404, "learning_rate": 6.291926514746824e-07, "loss": 0.5203, "step": 13152 }, { "epoch": 1.91, "grad_norm": 7.606942653656006, "learning_rate": 6.290456575352947e-07, "loss": 0.4922, "step": 13153 }, { "epoch": 1.91, "grad_norm": 7.252243995666504, "learning_rate": 6.28898672889565e-07, "loss": 0.5974, "step": 13154 }, { "epoch": 1.91, "grad_norm": 6.027740478515625, "learning_rate": 6.287516975411759e-07, "loss": 0.519, "step": 13155 }, { "epoch": 1.91, "grad_norm": 5.587104320526123, "learning_rate": 6.286047314938102e-07, "loss": 0.4416, "step": 13156 }, { "epoch": 1.91, "grad_norm": 6.766073703765869, "learning_rate": 6.284577747511492e-07, "loss": 0.4436, "step": 13157 }, { "epoch": 1.91, "grad_norm": 5.911036014556885, "learning_rate": 6.283108273168752e-07, "loss": 0.4486, "step": 13158 }, { "epoch": 1.91, "grad_norm": 6.173581123352051, "learning_rate": 6.281638891946694e-07, "loss": 0.5229, "step": 13159 }, { "epoch": 1.91, "grad_norm": 6.160005569458008, "learning_rate": 6.280169603882133e-07, "loss": 0.5201, "step": 13160 }, { "epoch": 1.91, "grad_norm": 6.261269569396973, "learning_rate": 6.278700409011877e-07, "loss": 0.5314, "step": 13161 }, { "epoch": 1.91, "grad_norm": 7.001314163208008, "learning_rate": 6.277231307372737e-07, "loss": 0.5768, "step": 13162 }, { "epoch": 1.91, "grad_norm": 7.170543193817139, "learning_rate": 6.275762299001514e-07, "loss": 0.5154, "step": 13163 }, { "epoch": 1.91, "grad_norm": 6.508970260620117, "learning_rate": 6.274293383935021e-07, "loss": 0.4969, "step": 13164 }, { "epoch": 1.91, "grad_norm": 7.1825971603393555, "learning_rate": 6.272824562210049e-07, "loss": 0.4994, "step": 13165 }, { "epoch": 1.91, "grad_norm": 6.993120193481445, "learning_rate": 6.271355833863405e-07, "loss": 0.5042, "step": 13166 }, { "epoch": 1.91, "grad_norm": 6.412059783935547, "learning_rate": 6.26988719893188e-07, "loss": 0.5459, "step": 13167 }, { "epoch": 1.91, "grad_norm": 7.4438910484313965, "learning_rate": 6.268418657452272e-07, "loss": 0.6327, "step": 13168 }, { "epoch": 1.91, "grad_norm": 6.784033298492432, "learning_rate": 6.266950209461369e-07, "loss": 0.5714, "step": 13169 }, { "epoch": 1.91, "grad_norm": 5.983336925506592, "learning_rate": 6.265481854995967e-07, "loss": 0.5327, "step": 13170 }, { "epoch": 1.91, "grad_norm": 7.151019096374512, "learning_rate": 6.264013594092843e-07, "loss": 0.5837, "step": 13171 }, { "epoch": 1.91, "grad_norm": 6.261846542358398, "learning_rate": 6.262545426788794e-07, "loss": 0.4979, "step": 13172 }, { "epoch": 1.91, "grad_norm": 6.7271409034729, "learning_rate": 6.261077353120591e-07, "loss": 0.509, "step": 13173 }, { "epoch": 1.91, "grad_norm": 6.422139644622803, "learning_rate": 6.259609373125025e-07, "loss": 0.5016, "step": 13174 }, { "epoch": 1.91, "grad_norm": 7.925694942474365, "learning_rate": 6.258141486838866e-07, "loss": 0.6096, "step": 13175 }, { "epoch": 1.91, "grad_norm": 6.543057441711426, "learning_rate": 6.256673694298896e-07, "loss": 0.5373, "step": 13176 }, { "epoch": 1.91, "grad_norm": 6.818686485290527, "learning_rate": 6.255205995541879e-07, "loss": 0.4999, "step": 13177 }, { "epoch": 1.91, "grad_norm": 6.260610103607178, "learning_rate": 6.253738390604595e-07, "loss": 0.5793, "step": 13178 }, { "epoch": 1.91, "grad_norm": 6.436194896697998, "learning_rate": 6.252270879523804e-07, "loss": 0.6021, "step": 13179 }, { "epoch": 1.91, "grad_norm": 6.337731838226318, "learning_rate": 6.25080346233628e-07, "loss": 0.5449, "step": 13180 }, { "epoch": 1.91, "grad_norm": 6.423578262329102, "learning_rate": 6.249336139078785e-07, "loss": 0.4616, "step": 13181 }, { "epoch": 1.91, "grad_norm": 6.892806529998779, "learning_rate": 6.247868909788074e-07, "loss": 0.5958, "step": 13182 }, { "epoch": 1.91, "grad_norm": 7.492220878601074, "learning_rate": 6.246401774500916e-07, "loss": 0.562, "step": 13183 }, { "epoch": 1.91, "grad_norm": 6.081180572509766, "learning_rate": 6.244934733254057e-07, "loss": 0.5191, "step": 13184 }, { "epoch": 1.91, "grad_norm": 7.938570976257324, "learning_rate": 6.243467786084263e-07, "loss": 0.6084, "step": 13185 }, { "epoch": 1.91, "grad_norm": 6.433441162109375, "learning_rate": 6.242000933028274e-07, "loss": 0.5079, "step": 13186 }, { "epoch": 1.91, "grad_norm": 6.372265815734863, "learning_rate": 6.240534174122851e-07, "loss": 0.4843, "step": 13187 }, { "epoch": 1.91, "grad_norm": 6.212697982788086, "learning_rate": 6.239067509404732e-07, "loss": 0.4705, "step": 13188 }, { "epoch": 1.91, "grad_norm": 6.480527400970459, "learning_rate": 6.23760093891067e-07, "loss": 0.5585, "step": 13189 }, { "epoch": 1.91, "grad_norm": 6.458919048309326, "learning_rate": 6.236134462677398e-07, "loss": 0.47, "step": 13190 }, { "epoch": 1.91, "grad_norm": 6.708348751068115, "learning_rate": 6.234668080741665e-07, "loss": 0.4705, "step": 13191 }, { "epoch": 1.91, "grad_norm": 7.4119977951049805, "learning_rate": 6.233201793140202e-07, "loss": 0.6, "step": 13192 }, { "epoch": 1.91, "grad_norm": 6.035830020904541, "learning_rate": 6.231735599909751e-07, "loss": 0.5457, "step": 13193 }, { "epoch": 1.91, "grad_norm": 6.572271347045898, "learning_rate": 6.230269501087038e-07, "loss": 0.512, "step": 13194 }, { "epoch": 1.91, "grad_norm": 6.934373378753662, "learning_rate": 6.228803496708802e-07, "loss": 0.5733, "step": 13195 }, { "epoch": 1.91, "grad_norm": 6.1492390632629395, "learning_rate": 6.227337586811765e-07, "loss": 0.478, "step": 13196 }, { "epoch": 1.91, "grad_norm": 6.278576850891113, "learning_rate": 6.225871771432656e-07, "loss": 0.5207, "step": 13197 }, { "epoch": 1.91, "grad_norm": 6.815771579742432, "learning_rate": 6.224406050608193e-07, "loss": 0.484, "step": 13198 }, { "epoch": 1.92, "grad_norm": 6.065677165985107, "learning_rate": 6.222940424375107e-07, "loss": 0.5313, "step": 13199 }, { "epoch": 1.92, "grad_norm": 7.357818603515625, "learning_rate": 6.221474892770106e-07, "loss": 0.4911, "step": 13200 }, { "epoch": 1.92, "grad_norm": 6.574626445770264, "learning_rate": 6.220009455829918e-07, "loss": 0.646, "step": 13201 }, { "epoch": 1.92, "grad_norm": 6.975106716156006, "learning_rate": 6.218544113591249e-07, "loss": 0.5232, "step": 13202 }, { "epoch": 1.92, "grad_norm": 6.878812313079834, "learning_rate": 6.217078866090814e-07, "loss": 0.5826, "step": 13203 }, { "epoch": 1.92, "grad_norm": 7.860546588897705, "learning_rate": 6.215613713365318e-07, "loss": 0.544, "step": 13204 }, { "epoch": 1.92, "grad_norm": 6.599250316619873, "learning_rate": 6.214148655451475e-07, "loss": 0.5035, "step": 13205 }, { "epoch": 1.92, "grad_norm": 6.322579860687256, "learning_rate": 6.212683692385983e-07, "loss": 0.532, "step": 13206 }, { "epoch": 1.92, "grad_norm": 7.165267467498779, "learning_rate": 6.211218824205547e-07, "loss": 0.5733, "step": 13207 }, { "epoch": 1.92, "grad_norm": 6.6780781745910645, "learning_rate": 6.20975405094687e-07, "loss": 0.4816, "step": 13208 }, { "epoch": 1.92, "grad_norm": 6.622347354888916, "learning_rate": 6.208289372646645e-07, "loss": 0.5086, "step": 13209 }, { "epoch": 1.92, "grad_norm": 5.576314926147461, "learning_rate": 6.206824789341572e-07, "loss": 0.4896, "step": 13210 }, { "epoch": 1.92, "grad_norm": 7.570354461669922, "learning_rate": 6.205360301068337e-07, "loss": 0.5666, "step": 13211 }, { "epoch": 1.92, "grad_norm": 6.288271427154541, "learning_rate": 6.203895907863636e-07, "loss": 0.4825, "step": 13212 }, { "epoch": 1.92, "grad_norm": 6.982413291931152, "learning_rate": 6.202431609764153e-07, "loss": 0.5607, "step": 13213 }, { "epoch": 1.92, "grad_norm": 6.495136260986328, "learning_rate": 6.200967406806579e-07, "loss": 0.5524, "step": 13214 }, { "epoch": 1.92, "grad_norm": 6.542544364929199, "learning_rate": 6.19950329902759e-07, "loss": 0.5023, "step": 13215 }, { "epoch": 1.92, "grad_norm": 7.272396564483643, "learning_rate": 6.198039286463873e-07, "loss": 0.5989, "step": 13216 }, { "epoch": 1.92, "grad_norm": 6.180851459503174, "learning_rate": 6.196575369152104e-07, "loss": 0.5461, "step": 13217 }, { "epoch": 1.92, "grad_norm": 7.751382350921631, "learning_rate": 6.19511154712896e-07, "loss": 0.5415, "step": 13218 }, { "epoch": 1.92, "grad_norm": 6.480297565460205, "learning_rate": 6.193647820431111e-07, "loss": 0.6038, "step": 13219 }, { "epoch": 1.92, "grad_norm": 7.0936102867126465, "learning_rate": 6.192184189095236e-07, "loss": 0.5517, "step": 13220 }, { "epoch": 1.92, "grad_norm": 7.05691385269165, "learning_rate": 6.190720653157992e-07, "loss": 0.517, "step": 13221 }, { "epoch": 1.92, "grad_norm": 6.539671897888184, "learning_rate": 6.18925721265606e-07, "loss": 0.5699, "step": 13222 }, { "epoch": 1.92, "grad_norm": 6.153030872344971, "learning_rate": 6.187793867626093e-07, "loss": 0.5063, "step": 13223 }, { "epoch": 1.92, "grad_norm": 6.634727478027344, "learning_rate": 6.186330618104758e-07, "loss": 0.478, "step": 13224 }, { "epoch": 1.92, "grad_norm": 6.545719146728516, "learning_rate": 6.184867464128709e-07, "loss": 0.524, "step": 13225 }, { "epoch": 1.92, "grad_norm": 6.717161178588867, "learning_rate": 6.183404405734611e-07, "loss": 0.4908, "step": 13226 }, { "epoch": 1.92, "grad_norm": 7.269778728485107, "learning_rate": 6.181941442959111e-07, "loss": 0.5969, "step": 13227 }, { "epoch": 1.92, "grad_norm": 6.673353672027588, "learning_rate": 6.180478575838868e-07, "loss": 0.5397, "step": 13228 }, { "epoch": 1.92, "grad_norm": 6.641757011413574, "learning_rate": 6.179015804410524e-07, "loss": 0.5874, "step": 13229 }, { "epoch": 1.92, "grad_norm": 6.662599563598633, "learning_rate": 6.177553128710735e-07, "loss": 0.5136, "step": 13230 }, { "epoch": 1.92, "grad_norm": 7.295899868011475, "learning_rate": 6.17609054877614e-07, "loss": 0.6012, "step": 13231 }, { "epoch": 1.92, "grad_norm": 7.234461307525635, "learning_rate": 6.174628064643384e-07, "loss": 0.5331, "step": 13232 }, { "epoch": 1.92, "grad_norm": 6.559841632843018, "learning_rate": 6.173165676349102e-07, "loss": 0.5348, "step": 13233 }, { "epoch": 1.92, "grad_norm": 6.326387405395508, "learning_rate": 6.171703383929938e-07, "loss": 0.5337, "step": 13234 }, { "epoch": 1.92, "grad_norm": 6.408576488494873, "learning_rate": 6.170241187422527e-07, "loss": 0.5062, "step": 13235 }, { "epoch": 1.92, "grad_norm": 7.1012959480285645, "learning_rate": 6.1687790868635e-07, "loss": 0.4826, "step": 13236 }, { "epoch": 1.92, "grad_norm": 7.29426908493042, "learning_rate": 6.16731708228949e-07, "loss": 0.5128, "step": 13237 }, { "epoch": 1.92, "grad_norm": 5.981572151184082, "learning_rate": 6.165855173737118e-07, "loss": 0.5193, "step": 13238 }, { "epoch": 1.92, "grad_norm": 6.380425453186035, "learning_rate": 6.164393361243019e-07, "loss": 0.4705, "step": 13239 }, { "epoch": 1.92, "grad_norm": 7.085021018981934, "learning_rate": 6.162931644843808e-07, "loss": 0.589, "step": 13240 }, { "epoch": 1.92, "grad_norm": 5.671924591064453, "learning_rate": 6.161470024576114e-07, "loss": 0.4751, "step": 13241 }, { "epoch": 1.92, "grad_norm": 6.503054141998291, "learning_rate": 6.160008500476548e-07, "loss": 0.4754, "step": 13242 }, { "epoch": 1.92, "grad_norm": 6.258585453033447, "learning_rate": 6.158547072581734e-07, "loss": 0.5128, "step": 13243 }, { "epoch": 1.92, "grad_norm": 7.033829689025879, "learning_rate": 6.157085740928278e-07, "loss": 0.571, "step": 13244 }, { "epoch": 1.92, "grad_norm": 6.4283294677734375, "learning_rate": 6.155624505552799e-07, "loss": 0.4814, "step": 13245 }, { "epoch": 1.92, "grad_norm": 6.770418167114258, "learning_rate": 6.154163366491896e-07, "loss": 0.5908, "step": 13246 }, { "epoch": 1.92, "grad_norm": 6.255176067352295, "learning_rate": 6.152702323782185e-07, "loss": 0.56, "step": 13247 }, { "epoch": 1.92, "grad_norm": 6.8846635818481445, "learning_rate": 6.151241377460262e-07, "loss": 0.5821, "step": 13248 }, { "epoch": 1.92, "grad_norm": 6.424644470214844, "learning_rate": 6.149780527562738e-07, "loss": 0.4665, "step": 13249 }, { "epoch": 1.92, "grad_norm": 6.3606791496276855, "learning_rate": 6.148319774126204e-07, "loss": 0.4879, "step": 13250 }, { "epoch": 1.92, "grad_norm": 6.697055816650391, "learning_rate": 6.146859117187259e-07, "loss": 0.6418, "step": 13251 }, { "epoch": 1.92, "grad_norm": 5.58647346496582, "learning_rate": 6.145398556782499e-07, "loss": 0.4302, "step": 13252 }, { "epoch": 1.92, "grad_norm": 6.369053840637207, "learning_rate": 6.143938092948517e-07, "loss": 0.4997, "step": 13253 }, { "epoch": 1.92, "grad_norm": 7.6166768074035645, "learning_rate": 6.142477725721898e-07, "loss": 0.6792, "step": 13254 }, { "epoch": 1.92, "grad_norm": 6.605097770690918, "learning_rate": 6.141017455139232e-07, "loss": 0.5738, "step": 13255 }, { "epoch": 1.92, "grad_norm": 6.791297912597656, "learning_rate": 6.139557281237103e-07, "loss": 0.508, "step": 13256 }, { "epoch": 1.92, "grad_norm": 5.93166971206665, "learning_rate": 6.138097204052097e-07, "loss": 0.4965, "step": 13257 }, { "epoch": 1.92, "grad_norm": 6.5429277420043945, "learning_rate": 6.136637223620789e-07, "loss": 0.5232, "step": 13258 }, { "epoch": 1.92, "grad_norm": 6.609084129333496, "learning_rate": 6.135177339979758e-07, "loss": 0.5055, "step": 13259 }, { "epoch": 1.92, "grad_norm": 6.357509613037109, "learning_rate": 6.133717553165576e-07, "loss": 0.5512, "step": 13260 }, { "epoch": 1.92, "grad_norm": 7.435555458068848, "learning_rate": 6.132257863214819e-07, "loss": 0.5391, "step": 13261 }, { "epoch": 1.92, "grad_norm": 6.042349338531494, "learning_rate": 6.130798270164062e-07, "loss": 0.4729, "step": 13262 }, { "epoch": 1.92, "grad_norm": 7.601889610290527, "learning_rate": 6.129338774049861e-07, "loss": 0.5627, "step": 13263 }, { "epoch": 1.92, "grad_norm": 6.235323905944824, "learning_rate": 6.127879374908794e-07, "loss": 0.488, "step": 13264 }, { "epoch": 1.92, "grad_norm": 6.643988609313965, "learning_rate": 6.126420072777413e-07, "loss": 0.5536, "step": 13265 }, { "epoch": 1.92, "grad_norm": 6.332042694091797, "learning_rate": 6.124960867692288e-07, "loss": 0.4425, "step": 13266 }, { "epoch": 1.92, "grad_norm": 6.732607364654541, "learning_rate": 6.123501759689969e-07, "loss": 0.5362, "step": 13267 }, { "epoch": 1.93, "grad_norm": 6.320659637451172, "learning_rate": 6.122042748807017e-07, "loss": 0.5016, "step": 13268 }, { "epoch": 1.93, "grad_norm": 6.059717655181885, "learning_rate": 6.120583835079978e-07, "loss": 0.5176, "step": 13269 }, { "epoch": 1.93, "grad_norm": 7.450612545013428, "learning_rate": 6.119125018545412e-07, "loss": 0.4461, "step": 13270 }, { "epoch": 1.93, "grad_norm": 5.849841594696045, "learning_rate": 6.117666299239863e-07, "loss": 0.5175, "step": 13271 }, { "epoch": 1.93, "grad_norm": 6.158318519592285, "learning_rate": 6.116207677199876e-07, "loss": 0.5211, "step": 13272 }, { "epoch": 1.93, "grad_norm": 7.26370906829834, "learning_rate": 6.114749152461993e-07, "loss": 0.5477, "step": 13273 }, { "epoch": 1.93, "grad_norm": 6.134305953979492, "learning_rate": 6.113290725062761e-07, "loss": 0.5692, "step": 13274 }, { "epoch": 1.93, "grad_norm": 6.070098876953125, "learning_rate": 6.111832395038709e-07, "loss": 0.4563, "step": 13275 }, { "epoch": 1.93, "grad_norm": 6.616950988769531, "learning_rate": 6.110374162426384e-07, "loss": 0.5706, "step": 13276 }, { "epoch": 1.93, "grad_norm": 6.396412372589111, "learning_rate": 6.108916027262311e-07, "loss": 0.5073, "step": 13277 }, { "epoch": 1.93, "grad_norm": 6.598964214324951, "learning_rate": 6.107457989583027e-07, "loss": 0.512, "step": 13278 }, { "epoch": 1.93, "grad_norm": 6.597873210906982, "learning_rate": 6.106000049425058e-07, "loss": 0.5697, "step": 13279 }, { "epoch": 1.93, "grad_norm": 6.680655002593994, "learning_rate": 6.104542206824932e-07, "loss": 0.5808, "step": 13280 }, { "epoch": 1.93, "grad_norm": 6.622256755828857, "learning_rate": 6.103084461819168e-07, "loss": 0.4664, "step": 13281 }, { "epoch": 1.93, "grad_norm": 6.996532917022705, "learning_rate": 6.101626814444294e-07, "loss": 0.5502, "step": 13282 }, { "epoch": 1.93, "grad_norm": 8.165322303771973, "learning_rate": 6.100169264736822e-07, "loss": 0.5339, "step": 13283 }, { "epoch": 1.93, "grad_norm": 6.380097389221191, "learning_rate": 6.098711812733277e-07, "loss": 0.5631, "step": 13284 }, { "epoch": 1.93, "grad_norm": 7.899885177612305, "learning_rate": 6.097254458470165e-07, "loss": 0.5337, "step": 13285 }, { "epoch": 1.93, "grad_norm": 6.4567131996154785, "learning_rate": 6.095797201984002e-07, "loss": 0.4675, "step": 13286 }, { "epoch": 1.93, "grad_norm": 7.074371814727783, "learning_rate": 6.094340043311299e-07, "loss": 0.475, "step": 13287 }, { "epoch": 1.93, "grad_norm": 6.576861381530762, "learning_rate": 6.092882982488557e-07, "loss": 0.5391, "step": 13288 }, { "epoch": 1.93, "grad_norm": 6.135781764984131, "learning_rate": 6.091426019552286e-07, "loss": 0.5152, "step": 13289 }, { "epoch": 1.93, "grad_norm": 5.869462490081787, "learning_rate": 6.089969154538983e-07, "loss": 0.5052, "step": 13290 }, { "epoch": 1.93, "grad_norm": 6.112564563751221, "learning_rate": 6.088512387485153e-07, "loss": 0.5281, "step": 13291 }, { "epoch": 1.93, "grad_norm": 5.811256408691406, "learning_rate": 6.087055718427287e-07, "loss": 0.5361, "step": 13292 }, { "epoch": 1.93, "grad_norm": 6.109641075134277, "learning_rate": 6.085599147401886e-07, "loss": 0.517, "step": 13293 }, { "epoch": 1.93, "grad_norm": 6.570080757141113, "learning_rate": 6.084142674445432e-07, "loss": 0.5063, "step": 13294 }, { "epoch": 1.93, "grad_norm": 7.327901363372803, "learning_rate": 6.082686299594425e-07, "loss": 0.5345, "step": 13295 }, { "epoch": 1.93, "grad_norm": 6.382884502410889, "learning_rate": 6.081230022885344e-07, "loss": 0.5395, "step": 13296 }, { "epoch": 1.93, "grad_norm": 5.9635114669799805, "learning_rate": 6.079773844354682e-07, "loss": 0.5172, "step": 13297 }, { "epoch": 1.93, "grad_norm": 6.629202365875244, "learning_rate": 6.078317764038912e-07, "loss": 0.5727, "step": 13298 }, { "epoch": 1.93, "grad_norm": 7.338027477264404, "learning_rate": 6.076861781974522e-07, "loss": 0.6038, "step": 13299 }, { "epoch": 1.93, "grad_norm": 7.765679836273193, "learning_rate": 6.075405898197985e-07, "loss": 0.5782, "step": 13300 }, { "epoch": 1.93, "grad_norm": 6.565989017486572, "learning_rate": 6.073950112745778e-07, "loss": 0.5156, "step": 13301 }, { "epoch": 1.93, "grad_norm": 6.207776069641113, "learning_rate": 6.072494425654367e-07, "loss": 0.4906, "step": 13302 }, { "epoch": 1.93, "grad_norm": 6.211877822875977, "learning_rate": 6.071038836960232e-07, "loss": 0.4624, "step": 13303 }, { "epoch": 1.93, "grad_norm": 5.894685745239258, "learning_rate": 6.069583346699831e-07, "loss": 0.4987, "step": 13304 }, { "epoch": 1.93, "grad_norm": 7.749598503112793, "learning_rate": 6.068127954909636e-07, "loss": 0.57, "step": 13305 }, { "epoch": 1.93, "grad_norm": 7.018560409545898, "learning_rate": 6.066672661626104e-07, "loss": 0.5242, "step": 13306 }, { "epoch": 1.93, "grad_norm": 6.720038890838623, "learning_rate": 6.065217466885699e-07, "loss": 0.4584, "step": 13307 }, { "epoch": 1.93, "grad_norm": 6.814540386199951, "learning_rate": 6.063762370724875e-07, "loss": 0.4974, "step": 13308 }, { "epoch": 1.93, "grad_norm": 6.570746898651123, "learning_rate": 6.062307373180093e-07, "loss": 0.4358, "step": 13309 }, { "epoch": 1.93, "grad_norm": 6.1518940925598145, "learning_rate": 6.060852474287797e-07, "loss": 0.546, "step": 13310 }, { "epoch": 1.93, "grad_norm": 7.02524995803833, "learning_rate": 6.059397674084445e-07, "loss": 0.5249, "step": 13311 }, { "epoch": 1.93, "grad_norm": 5.867456436157227, "learning_rate": 6.057942972606478e-07, "loss": 0.4826, "step": 13312 }, { "epoch": 1.93, "grad_norm": 6.004745960235596, "learning_rate": 6.056488369890347e-07, "loss": 0.5488, "step": 13313 }, { "epoch": 1.93, "grad_norm": 5.6604485511779785, "learning_rate": 6.055033865972495e-07, "loss": 0.5038, "step": 13314 }, { "epoch": 1.93, "grad_norm": 6.828333377838135, "learning_rate": 6.053579460889355e-07, "loss": 0.511, "step": 13315 }, { "epoch": 1.93, "grad_norm": 6.547844886779785, "learning_rate": 6.052125154677372e-07, "loss": 0.5404, "step": 13316 }, { "epoch": 1.93, "grad_norm": 7.067550182342529, "learning_rate": 6.050670947372976e-07, "loss": 0.5758, "step": 13317 }, { "epoch": 1.93, "grad_norm": 6.9625563621521, "learning_rate": 6.049216839012606e-07, "loss": 0.6299, "step": 13318 }, { "epoch": 1.93, "grad_norm": 6.580892562866211, "learning_rate": 6.047762829632688e-07, "loss": 0.5233, "step": 13319 }, { "epoch": 1.93, "grad_norm": 6.132984638214111, "learning_rate": 6.046308919269649e-07, "loss": 0.4921, "step": 13320 }, { "epoch": 1.93, "grad_norm": 6.261226654052734, "learning_rate": 6.044855107959916e-07, "loss": 0.5646, "step": 13321 }, { "epoch": 1.93, "grad_norm": 7.0298261642456055, "learning_rate": 6.043401395739916e-07, "loss": 0.4384, "step": 13322 }, { "epoch": 1.93, "grad_norm": 6.367610931396484, "learning_rate": 6.041947782646059e-07, "loss": 0.5444, "step": 13323 }, { "epoch": 1.93, "grad_norm": 7.106124401092529, "learning_rate": 6.040494268714774e-07, "loss": 0.458, "step": 13324 }, { "epoch": 1.93, "grad_norm": 6.783115863800049, "learning_rate": 6.039040853982467e-07, "loss": 0.6341, "step": 13325 }, { "epoch": 1.93, "grad_norm": 6.547173500061035, "learning_rate": 6.03758753848556e-07, "loss": 0.5216, "step": 13326 }, { "epoch": 1.93, "grad_norm": 7.338864326477051, "learning_rate": 6.036134322260455e-07, "loss": 0.5916, "step": 13327 }, { "epoch": 1.93, "grad_norm": 6.746616840362549, "learning_rate": 6.034681205343569e-07, "loss": 0.47, "step": 13328 }, { "epoch": 1.93, "grad_norm": 7.539849281311035, "learning_rate": 6.033228187771297e-07, "loss": 0.6183, "step": 13329 }, { "epoch": 1.93, "grad_norm": 6.194199085235596, "learning_rate": 6.031775269580052e-07, "loss": 0.4974, "step": 13330 }, { "epoch": 1.93, "grad_norm": 5.955207824707031, "learning_rate": 6.030322450806226e-07, "loss": 0.4867, "step": 13331 }, { "epoch": 1.93, "grad_norm": 5.575291156768799, "learning_rate": 6.028869731486224e-07, "loss": 0.561, "step": 13332 }, { "epoch": 1.93, "grad_norm": 8.073665618896484, "learning_rate": 6.027417111656434e-07, "loss": 0.5474, "step": 13333 }, { "epoch": 1.93, "grad_norm": 6.121310234069824, "learning_rate": 6.025964591353258e-07, "loss": 0.5715, "step": 13334 }, { "epoch": 1.93, "grad_norm": 6.893208980560303, "learning_rate": 6.024512170613082e-07, "loss": 0.5202, "step": 13335 }, { "epoch": 1.93, "grad_norm": 6.4560041427612305, "learning_rate": 6.023059849472294e-07, "loss": 0.5503, "step": 13336 }, { "epoch": 1.94, "grad_norm": 6.446819305419922, "learning_rate": 6.021607627967277e-07, "loss": 0.5506, "step": 13337 }, { "epoch": 1.94, "grad_norm": 6.789878845214844, "learning_rate": 6.020155506134416e-07, "loss": 0.4821, "step": 13338 }, { "epoch": 1.94, "grad_norm": 6.865872383117676, "learning_rate": 6.018703484010096e-07, "loss": 0.5972, "step": 13339 }, { "epoch": 1.94, "grad_norm": 7.994133472442627, "learning_rate": 6.01725156163069e-07, "loss": 0.5109, "step": 13340 }, { "epoch": 1.94, "grad_norm": 7.244035243988037, "learning_rate": 6.015799739032576e-07, "loss": 0.5238, "step": 13341 }, { "epoch": 1.94, "grad_norm": 6.900454521179199, "learning_rate": 6.014348016252122e-07, "loss": 0.5017, "step": 13342 }, { "epoch": 1.94, "grad_norm": 6.2603278160095215, "learning_rate": 6.012896393325706e-07, "loss": 0.5286, "step": 13343 }, { "epoch": 1.94, "grad_norm": 7.157546520233154, "learning_rate": 6.011444870289689e-07, "loss": 0.6037, "step": 13344 }, { "epoch": 1.94, "grad_norm": 6.656713962554932, "learning_rate": 6.009993447180443e-07, "loss": 0.5142, "step": 13345 }, { "epoch": 1.94, "grad_norm": 6.233750820159912, "learning_rate": 6.008542124034325e-07, "loss": 0.5369, "step": 13346 }, { "epoch": 1.94, "grad_norm": 7.0003838539123535, "learning_rate": 6.007090900887704e-07, "loss": 0.5751, "step": 13347 }, { "epoch": 1.94, "grad_norm": 6.3704328536987305, "learning_rate": 6.005639777776929e-07, "loss": 0.5496, "step": 13348 }, { "epoch": 1.94, "grad_norm": 7.365314960479736, "learning_rate": 6.004188754738363e-07, "loss": 0.5615, "step": 13349 }, { "epoch": 1.94, "grad_norm": 6.899964809417725, "learning_rate": 6.002737831808349e-07, "loss": 0.5998, "step": 13350 }, { "epoch": 1.94, "grad_norm": 6.335249423980713, "learning_rate": 6.001287009023249e-07, "loss": 0.4911, "step": 13351 }, { "epoch": 1.94, "grad_norm": 6.553360939025879, "learning_rate": 5.999836286419402e-07, "loss": 0.526, "step": 13352 }, { "epoch": 1.94, "grad_norm": 7.383922100067139, "learning_rate": 5.998385664033161e-07, "loss": 0.5777, "step": 13353 }, { "epoch": 1.94, "grad_norm": 6.8238301277160645, "learning_rate": 5.996935141900861e-07, "loss": 0.5217, "step": 13354 }, { "epoch": 1.94, "grad_norm": 6.667707920074463, "learning_rate": 5.99548472005885e-07, "loss": 0.5787, "step": 13355 }, { "epoch": 1.94, "grad_norm": 6.191534996032715, "learning_rate": 5.994034398543461e-07, "loss": 0.5047, "step": 13356 }, { "epoch": 1.94, "grad_norm": 7.468454837799072, "learning_rate": 5.992584177391034e-07, "loss": 0.5479, "step": 13357 }, { "epoch": 1.94, "grad_norm": 6.458163738250732, "learning_rate": 5.991134056637896e-07, "loss": 0.5091, "step": 13358 }, { "epoch": 1.94, "grad_norm": 7.801877975463867, "learning_rate": 5.989684036320383e-07, "loss": 0.4878, "step": 13359 }, { "epoch": 1.94, "grad_norm": 6.012439250946045, "learning_rate": 5.988234116474816e-07, "loss": 0.5174, "step": 13360 }, { "epoch": 1.94, "grad_norm": 5.858245372772217, "learning_rate": 5.986784297137529e-07, "loss": 0.5013, "step": 13361 }, { "epoch": 1.94, "grad_norm": 7.197771072387695, "learning_rate": 5.985334578344839e-07, "loss": 0.5259, "step": 13362 }, { "epoch": 1.94, "grad_norm": 6.843574523925781, "learning_rate": 5.983884960133067e-07, "loss": 0.6069, "step": 13363 }, { "epoch": 1.94, "grad_norm": 6.337989330291748, "learning_rate": 5.982435442538534e-07, "loss": 0.5168, "step": 13364 }, { "epoch": 1.94, "grad_norm": 5.432689189910889, "learning_rate": 5.98098602559755e-07, "loss": 0.5313, "step": 13365 }, { "epoch": 1.94, "grad_norm": 6.982274055480957, "learning_rate": 5.979536709346436e-07, "loss": 0.4811, "step": 13366 }, { "epoch": 1.94, "grad_norm": 7.01130485534668, "learning_rate": 5.978087493821491e-07, "loss": 0.5192, "step": 13367 }, { "epoch": 1.94, "grad_norm": 6.411470413208008, "learning_rate": 5.976638379059033e-07, "loss": 0.5088, "step": 13368 }, { "epoch": 1.94, "grad_norm": 7.324818134307861, "learning_rate": 5.975189365095363e-07, "loss": 0.4812, "step": 13369 }, { "epoch": 1.94, "grad_norm": 6.9930419921875, "learning_rate": 5.973740451966785e-07, "loss": 0.5888, "step": 13370 }, { "epoch": 1.94, "grad_norm": 6.930942058563232, "learning_rate": 5.972291639709594e-07, "loss": 0.5295, "step": 13371 }, { "epoch": 1.94, "grad_norm": 6.775755882263184, "learning_rate": 5.970842928360096e-07, "loss": 0.5652, "step": 13372 }, { "epoch": 1.94, "grad_norm": 6.773956775665283, "learning_rate": 5.969394317954577e-07, "loss": 0.5276, "step": 13373 }, { "epoch": 1.94, "grad_norm": 9.003584861755371, "learning_rate": 5.967945808529338e-07, "loss": 0.6009, "step": 13374 }, { "epoch": 1.94, "grad_norm": 6.539613246917725, "learning_rate": 5.966497400120665e-07, "loss": 0.503, "step": 13375 }, { "epoch": 1.94, "grad_norm": 7.067044734954834, "learning_rate": 5.965049092764844e-07, "loss": 0.5294, "step": 13376 }, { "epoch": 1.94, "grad_norm": 6.5025634765625, "learning_rate": 5.963600886498162e-07, "loss": 0.4538, "step": 13377 }, { "epoch": 1.94, "grad_norm": 6.751709938049316, "learning_rate": 5.962152781356904e-07, "loss": 0.5381, "step": 13378 }, { "epoch": 1.94, "grad_norm": 6.485123634338379, "learning_rate": 5.960704777377343e-07, "loss": 0.5139, "step": 13379 }, { "epoch": 1.94, "grad_norm": 6.7169189453125, "learning_rate": 5.959256874595766e-07, "loss": 0.5466, "step": 13380 }, { "epoch": 1.94, "grad_norm": 7.086606502532959, "learning_rate": 5.957809073048436e-07, "loss": 0.5403, "step": 13381 }, { "epoch": 1.94, "grad_norm": 6.3980021476745605, "learning_rate": 5.956361372771639e-07, "loss": 0.4507, "step": 13382 }, { "epoch": 1.94, "grad_norm": 7.842268466949463, "learning_rate": 5.954913773801633e-07, "loss": 0.5936, "step": 13383 }, { "epoch": 1.94, "grad_norm": 6.991177558898926, "learning_rate": 5.953466276174692e-07, "loss": 0.6, "step": 13384 }, { "epoch": 1.94, "grad_norm": 6.670877933502197, "learning_rate": 5.952018879927077e-07, "loss": 0.5706, "step": 13385 }, { "epoch": 1.94, "grad_norm": 7.05784273147583, "learning_rate": 5.950571585095052e-07, "loss": 0.4961, "step": 13386 }, { "epoch": 1.94, "grad_norm": 6.727292060852051, "learning_rate": 5.949124391714875e-07, "loss": 0.5531, "step": 13387 }, { "epoch": 1.94, "grad_norm": 6.238815784454346, "learning_rate": 5.947677299822808e-07, "loss": 0.4734, "step": 13388 }, { "epoch": 1.94, "grad_norm": 6.601090908050537, "learning_rate": 5.946230309455097e-07, "loss": 0.5392, "step": 13389 }, { "epoch": 1.94, "grad_norm": 6.568423271179199, "learning_rate": 5.944783420648001e-07, "loss": 0.4312, "step": 13390 }, { "epoch": 1.94, "grad_norm": 6.712279319763184, "learning_rate": 5.94333663343777e-07, "loss": 0.481, "step": 13391 }, { "epoch": 1.94, "grad_norm": 6.4551591873168945, "learning_rate": 5.941889947860643e-07, "loss": 0.5457, "step": 13392 }, { "epoch": 1.94, "grad_norm": 6.475332736968994, "learning_rate": 5.940443363952877e-07, "loss": 0.5423, "step": 13393 }, { "epoch": 1.94, "grad_norm": 6.845039367675781, "learning_rate": 5.938996881750698e-07, "loss": 0.5268, "step": 13394 }, { "epoch": 1.94, "grad_norm": 6.069554328918457, "learning_rate": 5.937550501290361e-07, "loss": 0.4962, "step": 13395 }, { "epoch": 1.94, "grad_norm": 7.335206031799316, "learning_rate": 5.936104222608092e-07, "loss": 0.4482, "step": 13396 }, { "epoch": 1.94, "grad_norm": 8.109267234802246, "learning_rate": 5.934658045740131e-07, "loss": 0.5719, "step": 13397 }, { "epoch": 1.94, "grad_norm": 6.791089057922363, "learning_rate": 5.933211970722703e-07, "loss": 0.5793, "step": 13398 }, { "epoch": 1.94, "grad_norm": 7.009284496307373, "learning_rate": 5.931765997592044e-07, "loss": 0.6221, "step": 13399 }, { "epoch": 1.94, "grad_norm": 6.0958476066589355, "learning_rate": 5.930320126384375e-07, "loss": 0.4752, "step": 13400 }, { "epoch": 1.94, "grad_norm": 6.21443510055542, "learning_rate": 5.928874357135927e-07, "loss": 0.5053, "step": 13401 }, { "epoch": 1.94, "grad_norm": 6.615541458129883, "learning_rate": 5.927428689882912e-07, "loss": 0.5146, "step": 13402 }, { "epoch": 1.94, "grad_norm": 6.776311874389648, "learning_rate": 5.925983124661559e-07, "loss": 0.5232, "step": 13403 }, { "epoch": 1.94, "grad_norm": 6.4211273193359375, "learning_rate": 5.924537661508077e-07, "loss": 0.4676, "step": 13404 }, { "epoch": 1.95, "grad_norm": 6.86971378326416, "learning_rate": 5.923092300458683e-07, "loss": 0.4721, "step": 13405 }, { "epoch": 1.95, "grad_norm": 6.63060188293457, "learning_rate": 5.921647041549584e-07, "loss": 0.4965, "step": 13406 }, { "epoch": 1.95, "grad_norm": 6.429490089416504, "learning_rate": 5.920201884816996e-07, "loss": 0.473, "step": 13407 }, { "epoch": 1.95, "grad_norm": 7.476651191711426, "learning_rate": 5.918756830297116e-07, "loss": 0.6292, "step": 13408 }, { "epoch": 1.95, "grad_norm": 6.682290554046631, "learning_rate": 5.917311878026156e-07, "loss": 0.5233, "step": 13409 }, { "epoch": 1.95, "grad_norm": 6.5551557540893555, "learning_rate": 5.915867028040313e-07, "loss": 0.5038, "step": 13410 }, { "epoch": 1.95, "grad_norm": 6.551204681396484, "learning_rate": 5.914422280375784e-07, "loss": 0.5838, "step": 13411 }, { "epoch": 1.95, "grad_norm": 5.946234226226807, "learning_rate": 5.912977635068768e-07, "loss": 0.4633, "step": 13412 }, { "epoch": 1.95, "grad_norm": 6.23209285736084, "learning_rate": 5.911533092155457e-07, "loss": 0.5752, "step": 13413 }, { "epoch": 1.95, "grad_norm": 6.549989700317383, "learning_rate": 5.910088651672038e-07, "loss": 0.5659, "step": 13414 }, { "epoch": 1.95, "grad_norm": 6.189809322357178, "learning_rate": 5.908644313654708e-07, "loss": 0.4962, "step": 13415 }, { "epoch": 1.95, "grad_norm": 6.072056770324707, "learning_rate": 5.90720007813964e-07, "loss": 0.5114, "step": 13416 }, { "epoch": 1.95, "grad_norm": 6.294030666351318, "learning_rate": 5.905755945163027e-07, "loss": 0.5668, "step": 13417 }, { "epoch": 1.95, "grad_norm": 6.039261341094971, "learning_rate": 5.904311914761049e-07, "loss": 0.5205, "step": 13418 }, { "epoch": 1.95, "grad_norm": 6.913852214813232, "learning_rate": 5.902867986969877e-07, "loss": 0.468, "step": 13419 }, { "epoch": 1.95, "grad_norm": 6.453685283660889, "learning_rate": 5.901424161825695e-07, "loss": 0.4933, "step": 13420 }, { "epoch": 1.95, "grad_norm": 6.78980827331543, "learning_rate": 5.899980439364666e-07, "loss": 0.4821, "step": 13421 }, { "epoch": 1.95, "grad_norm": 6.5730390548706055, "learning_rate": 5.898536819622969e-07, "loss": 0.5531, "step": 13422 }, { "epoch": 1.95, "grad_norm": 6.387603282928467, "learning_rate": 5.897093302636764e-07, "loss": 0.5047, "step": 13423 }, { "epoch": 1.95, "grad_norm": 7.669668674468994, "learning_rate": 5.895649888442225e-07, "loss": 0.6136, "step": 13424 }, { "epoch": 1.95, "grad_norm": 6.707252025604248, "learning_rate": 5.894206577075506e-07, "loss": 0.5588, "step": 13425 }, { "epoch": 1.95, "grad_norm": 6.058818817138672, "learning_rate": 5.892763368572773e-07, "loss": 0.5063, "step": 13426 }, { "epoch": 1.95, "grad_norm": 5.980530738830566, "learning_rate": 5.891320262970175e-07, "loss": 0.4606, "step": 13427 }, { "epoch": 1.95, "grad_norm": 6.31962251663208, "learning_rate": 5.889877260303877e-07, "loss": 0.5306, "step": 13428 }, { "epoch": 1.95, "grad_norm": 5.688970565795898, "learning_rate": 5.888434360610021e-07, "loss": 0.5286, "step": 13429 }, { "epoch": 1.95, "grad_norm": 6.218330383300781, "learning_rate": 5.886991563924767e-07, "loss": 0.5211, "step": 13430 }, { "epoch": 1.95, "grad_norm": 7.405797481536865, "learning_rate": 5.885548870284254e-07, "loss": 0.5783, "step": 13431 }, { "epoch": 1.95, "grad_norm": 6.972301006317139, "learning_rate": 5.88410627972463e-07, "loss": 0.5207, "step": 13432 }, { "epoch": 1.95, "grad_norm": 6.85235071182251, "learning_rate": 5.882663792282031e-07, "loss": 0.5258, "step": 13433 }, { "epoch": 1.95, "grad_norm": 7.4567670822143555, "learning_rate": 5.881221407992606e-07, "loss": 0.5489, "step": 13434 }, { "epoch": 1.95, "grad_norm": 5.858536243438721, "learning_rate": 5.879779126892481e-07, "loss": 0.4477, "step": 13435 }, { "epoch": 1.95, "grad_norm": 7.492300033569336, "learning_rate": 5.878336949017801e-07, "loss": 0.5806, "step": 13436 }, { "epoch": 1.95, "grad_norm": 6.608850479125977, "learning_rate": 5.876894874404687e-07, "loss": 0.5288, "step": 13437 }, { "epoch": 1.95, "grad_norm": 7.472646236419678, "learning_rate": 5.875452903089276e-07, "loss": 0.5537, "step": 13438 }, { "epoch": 1.95, "grad_norm": 6.322521209716797, "learning_rate": 5.874011035107689e-07, "loss": 0.4638, "step": 13439 }, { "epoch": 1.95, "grad_norm": 6.654798984527588, "learning_rate": 5.872569270496055e-07, "loss": 0.5123, "step": 13440 }, { "epoch": 1.95, "grad_norm": 7.10223388671875, "learning_rate": 5.871127609290485e-07, "loss": 0.488, "step": 13441 }, { "epoch": 1.95, "grad_norm": 6.866116523742676, "learning_rate": 5.869686051527106e-07, "loss": 0.5835, "step": 13442 }, { "epoch": 1.95, "grad_norm": 6.592984676361084, "learning_rate": 5.868244597242036e-07, "loss": 0.5433, "step": 13443 }, { "epoch": 1.95, "grad_norm": 6.971395492553711, "learning_rate": 5.866803246471382e-07, "loss": 0.51, "step": 13444 }, { "epoch": 1.95, "grad_norm": 6.418509006500244, "learning_rate": 5.865361999251255e-07, "loss": 0.5466, "step": 13445 }, { "epoch": 1.95, "grad_norm": 6.372063159942627, "learning_rate": 5.863920855617767e-07, "loss": 0.5022, "step": 13446 }, { "epoch": 1.95, "grad_norm": 7.079773902893066, "learning_rate": 5.862479815607022e-07, "loss": 0.5757, "step": 13447 }, { "epoch": 1.95, "grad_norm": 6.653221130371094, "learning_rate": 5.86103887925512e-07, "loss": 0.5534, "step": 13448 }, { "epoch": 1.95, "grad_norm": 6.599329471588135, "learning_rate": 5.859598046598167e-07, "loss": 0.4828, "step": 13449 }, { "epoch": 1.95, "grad_norm": 6.511810302734375, "learning_rate": 5.858157317672255e-07, "loss": 0.5464, "step": 13450 }, { "epoch": 1.95, "grad_norm": 6.51176118850708, "learning_rate": 5.856716692513483e-07, "loss": 0.5344, "step": 13451 }, { "epoch": 1.95, "grad_norm": 7.469809532165527, "learning_rate": 5.855276171157942e-07, "loss": 0.5317, "step": 13452 }, { "epoch": 1.95, "grad_norm": 7.061544895172119, "learning_rate": 5.853835753641724e-07, "loss": 0.5979, "step": 13453 }, { "epoch": 1.95, "grad_norm": 6.761013984680176, "learning_rate": 5.852395440000911e-07, "loss": 0.5127, "step": 13454 }, { "epoch": 1.95, "grad_norm": 6.726205348968506, "learning_rate": 5.850955230271592e-07, "loss": 0.5646, "step": 13455 }, { "epoch": 1.95, "grad_norm": 5.9907708168029785, "learning_rate": 5.849515124489847e-07, "loss": 0.442, "step": 13456 }, { "epoch": 1.95, "grad_norm": 7.180025577545166, "learning_rate": 5.84807512269176e-07, "loss": 0.5756, "step": 13457 }, { "epoch": 1.95, "grad_norm": 6.449298858642578, "learning_rate": 5.846635224913401e-07, "loss": 0.5316, "step": 13458 }, { "epoch": 1.95, "grad_norm": 6.590726852416992, "learning_rate": 5.845195431190851e-07, "loss": 0.5393, "step": 13459 }, { "epoch": 1.95, "grad_norm": 7.203274726867676, "learning_rate": 5.843755741560178e-07, "loss": 0.5062, "step": 13460 }, { "epoch": 1.95, "grad_norm": 6.6451945304870605, "learning_rate": 5.842316156057452e-07, "loss": 0.4868, "step": 13461 }, { "epoch": 1.95, "grad_norm": 6.106667518615723, "learning_rate": 5.840876674718737e-07, "loss": 0.564, "step": 13462 }, { "epoch": 1.95, "grad_norm": 6.607138156890869, "learning_rate": 5.839437297580102e-07, "loss": 0.5591, "step": 13463 }, { "epoch": 1.95, "grad_norm": 6.638584136962891, "learning_rate": 5.837998024677601e-07, "loss": 0.4926, "step": 13464 }, { "epoch": 1.95, "grad_norm": 6.90091609954834, "learning_rate": 5.836558856047305e-07, "loss": 0.5786, "step": 13465 }, { "epoch": 1.95, "grad_norm": 6.269802093505859, "learning_rate": 5.835119791725252e-07, "loss": 0.4859, "step": 13466 }, { "epoch": 1.95, "grad_norm": 6.667659759521484, "learning_rate": 5.833680831747513e-07, "loss": 0.5087, "step": 13467 }, { "epoch": 1.95, "grad_norm": 7.135426044464111, "learning_rate": 5.832241976150124e-07, "loss": 0.5136, "step": 13468 }, { "epoch": 1.95, "grad_norm": 6.532591342926025, "learning_rate": 5.830803224969144e-07, "loss": 0.5442, "step": 13469 }, { "epoch": 1.95, "grad_norm": 7.294456481933594, "learning_rate": 5.82936457824062e-07, "loss": 0.5078, "step": 13470 }, { "epoch": 1.95, "grad_norm": 6.98326301574707, "learning_rate": 5.827926036000581e-07, "loss": 0.5251, "step": 13471 }, { "epoch": 1.95, "grad_norm": 7.1711859703063965, "learning_rate": 5.826487598285083e-07, "loss": 0.6189, "step": 13472 }, { "epoch": 1.95, "grad_norm": 6.013576984405518, "learning_rate": 5.825049265130151e-07, "loss": 0.4711, "step": 13473 }, { "epoch": 1.96, "grad_norm": 6.554652690887451, "learning_rate": 5.823611036571834e-07, "loss": 0.5924, "step": 13474 }, { "epoch": 1.96, "grad_norm": 6.836673736572266, "learning_rate": 5.822172912646148e-07, "loss": 0.5226, "step": 13475 }, { "epoch": 1.96, "grad_norm": 6.138815879821777, "learning_rate": 5.820734893389141e-07, "loss": 0.5535, "step": 13476 }, { "epoch": 1.96, "grad_norm": 6.728602886199951, "learning_rate": 5.819296978836825e-07, "loss": 0.462, "step": 13477 }, { "epoch": 1.96, "grad_norm": 6.0380167961120605, "learning_rate": 5.81785916902523e-07, "loss": 0.5379, "step": 13478 }, { "epoch": 1.96, "grad_norm": 6.942659854888916, "learning_rate": 5.816421463990378e-07, "loss": 0.5788, "step": 13479 }, { "epoch": 1.96, "grad_norm": 5.6725358963012695, "learning_rate": 5.81498386376829e-07, "loss": 0.4468, "step": 13480 }, { "epoch": 1.96, "grad_norm": 6.954814434051514, "learning_rate": 5.813546368394981e-07, "loss": 0.4939, "step": 13481 }, { "epoch": 1.96, "grad_norm": 6.763372898101807, "learning_rate": 5.812108977906465e-07, "loss": 0.5178, "step": 13482 }, { "epoch": 1.96, "grad_norm": 7.6282734870910645, "learning_rate": 5.810671692338754e-07, "loss": 0.6134, "step": 13483 }, { "epoch": 1.96, "grad_norm": 6.719264507293701, "learning_rate": 5.809234511727856e-07, "loss": 0.614, "step": 13484 }, { "epoch": 1.96, "grad_norm": 6.914753437042236, "learning_rate": 5.80779743610978e-07, "loss": 0.5878, "step": 13485 }, { "epoch": 1.96, "grad_norm": 6.5541462898254395, "learning_rate": 5.80636046552053e-07, "loss": 0.4859, "step": 13486 }, { "epoch": 1.96, "grad_norm": 6.527444362640381, "learning_rate": 5.804923599996097e-07, "loss": 0.4881, "step": 13487 }, { "epoch": 1.96, "grad_norm": 6.679550647735596, "learning_rate": 5.803486839572496e-07, "loss": 0.5419, "step": 13488 }, { "epoch": 1.96, "grad_norm": 6.030409812927246, "learning_rate": 5.802050184285705e-07, "loss": 0.5345, "step": 13489 }, { "epoch": 1.96, "grad_norm": 7.0637383460998535, "learning_rate": 5.800613634171734e-07, "loss": 0.5428, "step": 13490 }, { "epoch": 1.96, "grad_norm": 6.465662002563477, "learning_rate": 5.79917718926656e-07, "loss": 0.5055, "step": 13491 }, { "epoch": 1.96, "grad_norm": 6.377130508422852, "learning_rate": 5.797740849606177e-07, "loss": 0.4789, "step": 13492 }, { "epoch": 1.96, "grad_norm": 5.929748058319092, "learning_rate": 5.796304615226568e-07, "loss": 0.4828, "step": 13493 }, { "epoch": 1.96, "grad_norm": 6.544672012329102, "learning_rate": 5.794868486163716e-07, "loss": 0.478, "step": 13494 }, { "epoch": 1.96, "grad_norm": 6.956284999847412, "learning_rate": 5.793432462453602e-07, "loss": 0.4533, "step": 13495 }, { "epoch": 1.96, "grad_norm": 7.004941940307617, "learning_rate": 5.791996544132205e-07, "loss": 0.5251, "step": 13496 }, { "epoch": 1.96, "grad_norm": 6.318947792053223, "learning_rate": 5.790560731235494e-07, "loss": 0.5168, "step": 13497 }, { "epoch": 1.96, "grad_norm": 6.4702277183532715, "learning_rate": 5.789125023799446e-07, "loss": 0.5569, "step": 13498 }, { "epoch": 1.96, "grad_norm": 5.872424125671387, "learning_rate": 5.787689421860029e-07, "loss": 0.4315, "step": 13499 }, { "epoch": 1.96, "grad_norm": 6.333953857421875, "learning_rate": 5.786253925453208e-07, "loss": 0.534, "step": 13500 }, { "epoch": 1.96, "grad_norm": 6.950352191925049, "learning_rate": 5.78481853461495e-07, "loss": 0.4998, "step": 13501 }, { "epoch": 1.96, "grad_norm": 6.914698123931885, "learning_rate": 5.783383249381213e-07, "loss": 0.607, "step": 13502 }, { "epoch": 1.96, "grad_norm": 6.729386806488037, "learning_rate": 5.781948069787958e-07, "loss": 0.4496, "step": 13503 }, { "epoch": 1.96, "grad_norm": 6.200037956237793, "learning_rate": 5.780512995871141e-07, "loss": 0.5332, "step": 13504 }, { "epoch": 1.96, "grad_norm": 6.444528102874756, "learning_rate": 5.779078027666717e-07, "loss": 0.4959, "step": 13505 }, { "epoch": 1.96, "grad_norm": 7.8058085441589355, "learning_rate": 5.777643165210628e-07, "loss": 0.6208, "step": 13506 }, { "epoch": 1.96, "grad_norm": 6.255331516265869, "learning_rate": 5.776208408538838e-07, "loss": 0.4583, "step": 13507 }, { "epoch": 1.96, "grad_norm": 7.176244258880615, "learning_rate": 5.774773757687275e-07, "loss": 0.5629, "step": 13508 }, { "epoch": 1.96, "grad_norm": 7.3676981925964355, "learning_rate": 5.773339212691897e-07, "loss": 0.4809, "step": 13509 }, { "epoch": 1.96, "grad_norm": 7.171986103057861, "learning_rate": 5.771904773588629e-07, "loss": 0.5811, "step": 13510 }, { "epoch": 1.96, "grad_norm": 6.075984477996826, "learning_rate": 5.770470440413426e-07, "loss": 0.4768, "step": 13511 }, { "epoch": 1.96, "grad_norm": 5.672754764556885, "learning_rate": 5.769036213202211e-07, "loss": 0.4204, "step": 13512 }, { "epoch": 1.96, "grad_norm": 6.967615604400635, "learning_rate": 5.767602091990917e-07, "loss": 0.5496, "step": 13513 }, { "epoch": 1.96, "grad_norm": 6.5838751792907715, "learning_rate": 5.766168076815478e-07, "loss": 0.529, "step": 13514 }, { "epoch": 1.96, "grad_norm": 7.218451976776123, "learning_rate": 5.764734167711817e-07, "loss": 0.6053, "step": 13515 }, { "epoch": 1.96, "grad_norm": 6.496098518371582, "learning_rate": 5.763300364715859e-07, "loss": 0.5572, "step": 13516 }, { "epoch": 1.96, "grad_norm": 6.914050102233887, "learning_rate": 5.761866667863529e-07, "loss": 0.5958, "step": 13517 }, { "epoch": 1.96, "grad_norm": 6.598820686340332, "learning_rate": 5.760433077190741e-07, "loss": 0.4661, "step": 13518 }, { "epoch": 1.96, "grad_norm": 6.886806488037109, "learning_rate": 5.758999592733415e-07, "loss": 0.5599, "step": 13519 }, { "epoch": 1.96, "grad_norm": 7.096063613891602, "learning_rate": 5.757566214527462e-07, "loss": 0.5231, "step": 13520 }, { "epoch": 1.96, "grad_norm": 6.270813941955566, "learning_rate": 5.756132942608796e-07, "loss": 0.487, "step": 13521 }, { "epoch": 1.96, "grad_norm": 6.253013610839844, "learning_rate": 5.754699777013323e-07, "loss": 0.545, "step": 13522 }, { "epoch": 1.96, "grad_norm": 6.363788604736328, "learning_rate": 5.753266717776948e-07, "loss": 0.4931, "step": 13523 }, { "epoch": 1.96, "grad_norm": 6.82499885559082, "learning_rate": 5.751833764935576e-07, "loss": 0.4991, "step": 13524 }, { "epoch": 1.96, "grad_norm": 6.827650547027588, "learning_rate": 5.750400918525107e-07, "loss": 0.5606, "step": 13525 }, { "epoch": 1.96, "grad_norm": 5.718368053436279, "learning_rate": 5.748968178581441e-07, "loss": 0.4862, "step": 13526 }, { "epoch": 1.96, "grad_norm": 7.239282608032227, "learning_rate": 5.747535545140463e-07, "loss": 0.5405, "step": 13527 }, { "epoch": 1.96, "grad_norm": 5.987356185913086, "learning_rate": 5.74610301823808e-07, "loss": 0.4714, "step": 13528 }, { "epoch": 1.96, "grad_norm": 6.549969673156738, "learning_rate": 5.744670597910167e-07, "loss": 0.606, "step": 13529 }, { "epoch": 1.96, "grad_norm": 5.828492641448975, "learning_rate": 5.743238284192624e-07, "loss": 0.5217, "step": 13530 }, { "epoch": 1.96, "grad_norm": 7.18264627456665, "learning_rate": 5.741806077121327e-07, "loss": 0.5188, "step": 13531 }, { "epoch": 1.96, "grad_norm": 6.061948299407959, "learning_rate": 5.74037397673216e-07, "loss": 0.5039, "step": 13532 }, { "epoch": 1.96, "grad_norm": 6.03443717956543, "learning_rate": 5.738941983061001e-07, "loss": 0.5537, "step": 13533 }, { "epoch": 1.96, "grad_norm": 6.111670017242432, "learning_rate": 5.737510096143728e-07, "loss": 0.6117, "step": 13534 }, { "epoch": 1.96, "grad_norm": 6.569931983947754, "learning_rate": 5.736078316016213e-07, "loss": 0.5381, "step": 13535 }, { "epoch": 1.96, "grad_norm": 6.5507025718688965, "learning_rate": 5.734646642714327e-07, "loss": 0.4804, "step": 13536 }, { "epoch": 1.96, "grad_norm": 6.356552600860596, "learning_rate": 5.73321507627394e-07, "loss": 0.4911, "step": 13537 }, { "epoch": 1.96, "grad_norm": 6.8495354652404785, "learning_rate": 5.731783616730915e-07, "loss": 0.5852, "step": 13538 }, { "epoch": 1.96, "grad_norm": 6.448141098022461, "learning_rate": 5.730352264121118e-07, "loss": 0.534, "step": 13539 }, { "epoch": 1.96, "grad_norm": 6.978740215301514, "learning_rate": 5.72892101848041e-07, "loss": 0.557, "step": 13540 }, { "epoch": 1.96, "grad_norm": 5.993851661682129, "learning_rate": 5.72748987984464e-07, "loss": 0.4921, "step": 13541 }, { "epoch": 1.96, "grad_norm": 6.625569820404053, "learning_rate": 5.726058848249675e-07, "loss": 0.5228, "step": 13542 }, { "epoch": 1.97, "grad_norm": 6.066661357879639, "learning_rate": 5.724627923731355e-07, "loss": 0.6125, "step": 13543 }, { "epoch": 1.97, "grad_norm": 7.03924036026001, "learning_rate": 5.723197106325544e-07, "loss": 0.4909, "step": 13544 }, { "epoch": 1.97, "grad_norm": 6.824857711791992, "learning_rate": 5.721766396068071e-07, "loss": 0.5412, "step": 13545 }, { "epoch": 1.97, "grad_norm": 7.729550361633301, "learning_rate": 5.720335792994798e-07, "loss": 0.5651, "step": 13546 }, { "epoch": 1.97, "grad_norm": 6.395285606384277, "learning_rate": 5.718905297141552e-07, "loss": 0.5721, "step": 13547 }, { "epoch": 1.97, "grad_norm": 6.874458312988281, "learning_rate": 5.717474908544176e-07, "loss": 0.5003, "step": 13548 }, { "epoch": 1.97, "grad_norm": 6.477562427520752, "learning_rate": 5.716044627238515e-07, "loss": 0.502, "step": 13549 }, { "epoch": 1.97, "grad_norm": 7.071204662322998, "learning_rate": 5.714614453260388e-07, "loss": 0.4999, "step": 13550 }, { "epoch": 1.97, "grad_norm": 7.922715663909912, "learning_rate": 5.71318438664564e-07, "loss": 0.5589, "step": 13551 }, { "epoch": 1.97, "grad_norm": 6.801260948181152, "learning_rate": 5.711754427430089e-07, "loss": 0.5321, "step": 13552 }, { "epoch": 1.97, "grad_norm": 5.957098960876465, "learning_rate": 5.710324575649561e-07, "loss": 0.4946, "step": 13553 }, { "epoch": 1.97, "grad_norm": 6.908222675323486, "learning_rate": 5.708894831339883e-07, "loss": 0.5784, "step": 13554 }, { "epoch": 1.97, "grad_norm": 6.062019348144531, "learning_rate": 5.707465194536871e-07, "loss": 0.456, "step": 13555 }, { "epoch": 1.97, "grad_norm": 7.52485990524292, "learning_rate": 5.706035665276345e-07, "loss": 0.653, "step": 13556 }, { "epoch": 1.97, "grad_norm": 6.8639960289001465, "learning_rate": 5.704606243594119e-07, "loss": 0.5841, "step": 13557 }, { "epoch": 1.97, "grad_norm": 7.040689945220947, "learning_rate": 5.703176929526003e-07, "loss": 0.5035, "step": 13558 }, { "epoch": 1.97, "grad_norm": 6.751730442047119, "learning_rate": 5.701747723107809e-07, "loss": 0.5878, "step": 13559 }, { "epoch": 1.97, "grad_norm": 6.03173828125, "learning_rate": 5.70031862437534e-07, "loss": 0.4135, "step": 13560 }, { "epoch": 1.97, "grad_norm": 6.9028706550598145, "learning_rate": 5.698889633364406e-07, "loss": 0.5516, "step": 13561 }, { "epoch": 1.97, "grad_norm": 7.203668117523193, "learning_rate": 5.697460750110796e-07, "loss": 0.4832, "step": 13562 }, { "epoch": 1.97, "grad_norm": 6.970329761505127, "learning_rate": 5.696031974650324e-07, "loss": 0.5703, "step": 13563 }, { "epoch": 1.97, "grad_norm": 5.998164176940918, "learning_rate": 5.694603307018769e-07, "loss": 0.4771, "step": 13564 }, { "epoch": 1.97, "grad_norm": 6.2083516120910645, "learning_rate": 5.693174747251941e-07, "loss": 0.4914, "step": 13565 }, { "epoch": 1.97, "grad_norm": 7.0868024826049805, "learning_rate": 5.691746295385615e-07, "loss": 0.5353, "step": 13566 }, { "epoch": 1.97, "grad_norm": 6.4306793212890625, "learning_rate": 5.690317951455592e-07, "loss": 0.4837, "step": 13567 }, { "epoch": 1.97, "grad_norm": 6.798173904418945, "learning_rate": 5.688889715497647e-07, "loss": 0.5247, "step": 13568 }, { "epoch": 1.97, "grad_norm": 6.918671607971191, "learning_rate": 5.687461587547565e-07, "loss": 0.557, "step": 13569 }, { "epoch": 1.97, "grad_norm": 6.490324974060059, "learning_rate": 5.686033567641126e-07, "loss": 0.5191, "step": 13570 }, { "epoch": 1.97, "grad_norm": 6.771347522735596, "learning_rate": 5.684605655814106e-07, "loss": 0.6157, "step": 13571 }, { "epoch": 1.97, "grad_norm": 6.901094913482666, "learning_rate": 5.683177852102281e-07, "loss": 0.6074, "step": 13572 }, { "epoch": 1.97, "grad_norm": 6.180509090423584, "learning_rate": 5.681750156541419e-07, "loss": 0.544, "step": 13573 }, { "epoch": 1.97, "grad_norm": 6.1984968185424805, "learning_rate": 5.680322569167291e-07, "loss": 0.5566, "step": 13574 }, { "epoch": 1.97, "grad_norm": 6.924862384796143, "learning_rate": 5.678895090015663e-07, "loss": 0.6156, "step": 13575 }, { "epoch": 1.97, "grad_norm": 6.984633922576904, "learning_rate": 5.677467719122296e-07, "loss": 0.6023, "step": 13576 }, { "epoch": 1.97, "grad_norm": 6.349648475646973, "learning_rate": 5.676040456522953e-07, "loss": 0.4546, "step": 13577 }, { "epoch": 1.97, "grad_norm": 6.078750133514404, "learning_rate": 5.67461330225339e-07, "loss": 0.4945, "step": 13578 }, { "epoch": 1.97, "grad_norm": 7.4841790199279785, "learning_rate": 5.673186256349364e-07, "loss": 0.6315, "step": 13579 }, { "epoch": 1.97, "grad_norm": 6.623937129974365, "learning_rate": 5.671759318846625e-07, "loss": 0.5626, "step": 13580 }, { "epoch": 1.97, "grad_norm": 7.243165969848633, "learning_rate": 5.670332489780925e-07, "loss": 0.6095, "step": 13581 }, { "epoch": 1.97, "grad_norm": 6.972428798675537, "learning_rate": 5.668905769188011e-07, "loss": 0.5386, "step": 13582 }, { "epoch": 1.97, "grad_norm": 5.885970115661621, "learning_rate": 5.667479157103619e-07, "loss": 0.5196, "step": 13583 }, { "epoch": 1.97, "grad_norm": 6.183384418487549, "learning_rate": 5.666052653563505e-07, "loss": 0.5197, "step": 13584 }, { "epoch": 1.97, "grad_norm": 6.691746234893799, "learning_rate": 5.664626258603393e-07, "loss": 0.565, "step": 13585 }, { "epoch": 1.97, "grad_norm": 6.9461236000061035, "learning_rate": 5.663199972259032e-07, "loss": 0.5141, "step": 13586 }, { "epoch": 1.97, "grad_norm": 6.451807498931885, "learning_rate": 5.661773794566146e-07, "loss": 0.5669, "step": 13587 }, { "epoch": 1.97, "grad_norm": 7.181048393249512, "learning_rate": 5.660347725560468e-07, "loss": 0.5461, "step": 13588 }, { "epoch": 1.97, "grad_norm": 6.195990562438965, "learning_rate": 5.658921765277727e-07, "loss": 0.5328, "step": 13589 }, { "epoch": 1.97, "grad_norm": 6.3712663650512695, "learning_rate": 5.657495913753649e-07, "loss": 0.5473, "step": 13590 }, { "epoch": 1.97, "grad_norm": 6.745782375335693, "learning_rate": 5.656070171023954e-07, "loss": 0.5193, "step": 13591 }, { "epoch": 1.97, "grad_norm": 6.421778202056885, "learning_rate": 5.654644537124363e-07, "loss": 0.5007, "step": 13592 }, { "epoch": 1.97, "grad_norm": 6.499269485473633, "learning_rate": 5.653219012090591e-07, "loss": 0.5085, "step": 13593 }, { "epoch": 1.97, "grad_norm": 7.289037227630615, "learning_rate": 5.651793595958356e-07, "loss": 0.5246, "step": 13594 }, { "epoch": 1.97, "grad_norm": 6.81453800201416, "learning_rate": 5.650368288763367e-07, "loss": 0.5013, "step": 13595 }, { "epoch": 1.97, "grad_norm": 6.254787445068359, "learning_rate": 5.648943090541336e-07, "loss": 0.444, "step": 13596 }, { "epoch": 1.97, "grad_norm": 6.266003608703613, "learning_rate": 5.647518001327958e-07, "loss": 0.4816, "step": 13597 }, { "epoch": 1.97, "grad_norm": 7.007503986358643, "learning_rate": 5.646093021158953e-07, "loss": 0.5027, "step": 13598 }, { "epoch": 1.97, "grad_norm": 6.640143394470215, "learning_rate": 5.644668150070007e-07, "loss": 0.4977, "step": 13599 }, { "epoch": 1.97, "grad_norm": 6.6700592041015625, "learning_rate": 5.643243388096825e-07, "loss": 0.5577, "step": 13600 }, { "epoch": 1.97, "grad_norm": 6.240353107452393, "learning_rate": 5.641818735275101e-07, "loss": 0.5397, "step": 13601 }, { "epoch": 1.97, "grad_norm": 7.022863388061523, "learning_rate": 5.64039419164053e-07, "loss": 0.5882, "step": 13602 }, { "epoch": 1.97, "grad_norm": 6.700203895568848, "learning_rate": 5.6389697572288e-07, "loss": 0.5374, "step": 13603 }, { "epoch": 1.97, "grad_norm": 6.038552284240723, "learning_rate": 5.63754543207559e-07, "loss": 0.5083, "step": 13604 }, { "epoch": 1.97, "grad_norm": 6.681051254272461, "learning_rate": 5.636121216216598e-07, "loss": 0.4774, "step": 13605 }, { "epoch": 1.97, "grad_norm": 7.123988628387451, "learning_rate": 5.634697109687492e-07, "loss": 0.5304, "step": 13606 }, { "epoch": 1.97, "grad_norm": 6.928262233734131, "learning_rate": 5.633273112523964e-07, "loss": 0.5382, "step": 13607 }, { "epoch": 1.97, "grad_norm": 7.053359508514404, "learning_rate": 5.63184922476168e-07, "loss": 0.5419, "step": 13608 }, { "epoch": 1.97, "grad_norm": 7.462217330932617, "learning_rate": 5.630425446436315e-07, "loss": 0.6047, "step": 13609 }, { "epoch": 1.97, "grad_norm": 6.5388360023498535, "learning_rate": 5.629001777583542e-07, "loss": 0.5653, "step": 13610 }, { "epoch": 1.97, "grad_norm": 6.8043694496154785, "learning_rate": 5.627578218239026e-07, "loss": 0.4952, "step": 13611 }, { "epoch": 1.98, "grad_norm": 6.609791278839111, "learning_rate": 5.626154768438434e-07, "loss": 0.5029, "step": 13612 }, { "epoch": 1.98, "grad_norm": 6.649084568023682, "learning_rate": 5.624731428217427e-07, "loss": 0.5146, "step": 13613 }, { "epoch": 1.98, "grad_norm": 7.301408767700195, "learning_rate": 5.623308197611665e-07, "loss": 0.5342, "step": 13614 }, { "epoch": 1.98, "grad_norm": 6.783583641052246, "learning_rate": 5.621885076656805e-07, "loss": 0.5317, "step": 13615 }, { "epoch": 1.98, "grad_norm": 6.478969097137451, "learning_rate": 5.6204620653885e-07, "loss": 0.4852, "step": 13616 }, { "epoch": 1.98, "grad_norm": 6.731100559234619, "learning_rate": 5.619039163842407e-07, "loss": 0.5938, "step": 13617 }, { "epoch": 1.98, "grad_norm": 6.413249969482422, "learning_rate": 5.617616372054161e-07, "loss": 0.5653, "step": 13618 }, { "epoch": 1.98, "grad_norm": 6.155475616455078, "learning_rate": 5.616193690059426e-07, "loss": 0.4958, "step": 13619 }, { "epoch": 1.98, "grad_norm": 6.408347129821777, "learning_rate": 5.614771117893824e-07, "loss": 0.4882, "step": 13620 }, { "epoch": 1.98, "grad_norm": 6.519688129425049, "learning_rate": 5.613348655593017e-07, "loss": 0.4848, "step": 13621 }, { "epoch": 1.98, "grad_norm": 6.508345127105713, "learning_rate": 5.611926303192623e-07, "loss": 0.5307, "step": 13622 }, { "epoch": 1.98, "grad_norm": 7.055621147155762, "learning_rate": 5.610504060728293e-07, "loss": 0.5258, "step": 13623 }, { "epoch": 1.98, "grad_norm": 7.532862186431885, "learning_rate": 5.609081928235647e-07, "loss": 0.4881, "step": 13624 }, { "epoch": 1.98, "grad_norm": 6.599603176116943, "learning_rate": 5.607659905750316e-07, "loss": 0.4414, "step": 13625 }, { "epoch": 1.98, "grad_norm": 7.402770042419434, "learning_rate": 5.606237993307936e-07, "loss": 0.5358, "step": 13626 }, { "epoch": 1.98, "grad_norm": 7.265474796295166, "learning_rate": 5.604816190944117e-07, "loss": 0.5204, "step": 13627 }, { "epoch": 1.98, "grad_norm": 6.791872501373291, "learning_rate": 5.603394498694496e-07, "loss": 0.5573, "step": 13628 }, { "epoch": 1.98, "grad_norm": 6.044124603271484, "learning_rate": 5.601972916594677e-07, "loss": 0.4717, "step": 13629 }, { "epoch": 1.98, "grad_norm": 6.555248737335205, "learning_rate": 5.60055144468028e-07, "loss": 0.5539, "step": 13630 }, { "epoch": 1.98, "grad_norm": 6.082862377166748, "learning_rate": 5.599130082986918e-07, "loss": 0.5212, "step": 13631 }, { "epoch": 1.98, "grad_norm": 6.270779132843018, "learning_rate": 5.597708831550203e-07, "loss": 0.5146, "step": 13632 }, { "epoch": 1.98, "grad_norm": 5.998843193054199, "learning_rate": 5.596287690405739e-07, "loss": 0.4918, "step": 13633 }, { "epoch": 1.98, "grad_norm": 6.1449666023254395, "learning_rate": 5.594866659589131e-07, "loss": 0.5304, "step": 13634 }, { "epoch": 1.98, "grad_norm": 7.341848373413086, "learning_rate": 5.593445739135982e-07, "loss": 0.5373, "step": 13635 }, { "epoch": 1.98, "grad_norm": 6.638881683349609, "learning_rate": 5.592024929081891e-07, "loss": 0.5375, "step": 13636 }, { "epoch": 1.98, "grad_norm": 5.902073383331299, "learning_rate": 5.590604229462453e-07, "loss": 0.4892, "step": 13637 }, { "epoch": 1.98, "grad_norm": 7.1602325439453125, "learning_rate": 5.589183640313263e-07, "loss": 0.5877, "step": 13638 }, { "epoch": 1.98, "grad_norm": 6.452001094818115, "learning_rate": 5.587763161669904e-07, "loss": 0.4844, "step": 13639 }, { "epoch": 1.98, "grad_norm": 6.880300045013428, "learning_rate": 5.586342793567978e-07, "loss": 0.601, "step": 13640 }, { "epoch": 1.98, "grad_norm": 6.178164958953857, "learning_rate": 5.584922536043053e-07, "loss": 0.56, "step": 13641 }, { "epoch": 1.98, "grad_norm": 6.278682231903076, "learning_rate": 5.583502389130729e-07, "loss": 0.5567, "step": 13642 }, { "epoch": 1.98, "grad_norm": 6.612026214599609, "learning_rate": 5.582082352866573e-07, "loss": 0.5182, "step": 13643 }, { "epoch": 1.98, "grad_norm": 6.457296371459961, "learning_rate": 5.580662427286163e-07, "loss": 0.4995, "step": 13644 }, { "epoch": 1.98, "grad_norm": 6.820563316345215, "learning_rate": 5.579242612425077e-07, "loss": 0.538, "step": 13645 }, { "epoch": 1.98, "grad_norm": 6.890692710876465, "learning_rate": 5.577822908318884e-07, "loss": 0.5226, "step": 13646 }, { "epoch": 1.98, "grad_norm": 7.076621055603027, "learning_rate": 5.576403315003153e-07, "loss": 0.6361, "step": 13647 }, { "epoch": 1.98, "grad_norm": 6.913827896118164, "learning_rate": 5.574983832513449e-07, "loss": 0.5076, "step": 13648 }, { "epoch": 1.98, "grad_norm": 6.215307235717773, "learning_rate": 5.573564460885336e-07, "loss": 0.5589, "step": 13649 }, { "epoch": 1.98, "grad_norm": 6.9787139892578125, "learning_rate": 5.572145200154373e-07, "loss": 0.5803, "step": 13650 }, { "epoch": 1.98, "grad_norm": 7.116899490356445, "learning_rate": 5.570726050356116e-07, "loss": 0.5605, "step": 13651 }, { "epoch": 1.98, "grad_norm": 6.298495769500732, "learning_rate": 5.569307011526124e-07, "loss": 0.5307, "step": 13652 }, { "epoch": 1.98, "grad_norm": 6.705995559692383, "learning_rate": 5.567888083699943e-07, "loss": 0.5591, "step": 13653 }, { "epoch": 1.98, "grad_norm": 6.513428688049316, "learning_rate": 5.566469266913126e-07, "loss": 0.4773, "step": 13654 }, { "epoch": 1.98, "grad_norm": 6.806107997894287, "learning_rate": 5.565050561201217e-07, "loss": 0.5498, "step": 13655 }, { "epoch": 1.98, "grad_norm": 6.6579694747924805, "learning_rate": 5.56363196659976e-07, "loss": 0.5112, "step": 13656 }, { "epoch": 1.98, "grad_norm": 7.287471771240234, "learning_rate": 5.562213483144295e-07, "loss": 0.5867, "step": 13657 }, { "epoch": 1.98, "grad_norm": 6.293632984161377, "learning_rate": 5.560795110870362e-07, "loss": 0.5175, "step": 13658 }, { "epoch": 1.98, "grad_norm": 6.4394354820251465, "learning_rate": 5.5593768498135e-07, "loss": 0.5504, "step": 13659 }, { "epoch": 1.98, "grad_norm": 6.340704917907715, "learning_rate": 5.557958700009224e-07, "loss": 0.5609, "step": 13660 }, { "epoch": 1.98, "grad_norm": 8.04863166809082, "learning_rate": 5.556540661493086e-07, "loss": 0.5272, "step": 13661 }, { "epoch": 1.98, "grad_norm": 7.052822113037109, "learning_rate": 5.555122734300592e-07, "loss": 0.5354, "step": 13662 }, { "epoch": 1.98, "grad_norm": 7.455256938934326, "learning_rate": 5.553704918467286e-07, "loss": 0.5653, "step": 13663 }, { "epoch": 1.98, "grad_norm": 6.52653694152832, "learning_rate": 5.552287214028674e-07, "loss": 0.4911, "step": 13664 }, { "epoch": 1.98, "grad_norm": 5.648570537567139, "learning_rate": 5.550869621020278e-07, "loss": 0.4217, "step": 13665 }, { "epoch": 1.98, "grad_norm": 6.553218364715576, "learning_rate": 5.549452139477616e-07, "loss": 0.5411, "step": 13666 }, { "epoch": 1.98, "grad_norm": 6.835035800933838, "learning_rate": 5.548034769436199e-07, "loss": 0.5997, "step": 13667 }, { "epoch": 1.98, "grad_norm": 5.778347492218018, "learning_rate": 5.546617510931535e-07, "loss": 0.463, "step": 13668 }, { "epoch": 1.98, "grad_norm": 6.859004020690918, "learning_rate": 5.545200363999137e-07, "loss": 0.5274, "step": 13669 }, { "epoch": 1.98, "grad_norm": 6.390631675720215, "learning_rate": 5.543783328674503e-07, "loss": 0.5551, "step": 13670 }, { "epoch": 1.98, "grad_norm": 6.228623867034912, "learning_rate": 5.542366404993137e-07, "loss": 0.4848, "step": 13671 }, { "epoch": 1.98, "grad_norm": 7.102837562561035, "learning_rate": 5.540949592990538e-07, "loss": 0.521, "step": 13672 }, { "epoch": 1.98, "grad_norm": 6.730447769165039, "learning_rate": 5.539532892702206e-07, "loss": 0.6312, "step": 13673 }, { "epoch": 1.98, "grad_norm": 6.680048942565918, "learning_rate": 5.538116304163623e-07, "loss": 0.5442, "step": 13674 }, { "epoch": 1.98, "grad_norm": 6.760647296905518, "learning_rate": 5.536699827410293e-07, "loss": 0.5025, "step": 13675 }, { "epoch": 1.98, "grad_norm": 6.7899556159973145, "learning_rate": 5.535283462477689e-07, "loss": 0.5071, "step": 13676 }, { "epoch": 1.98, "grad_norm": 6.547217845916748, "learning_rate": 5.533867209401308e-07, "loss": 0.5408, "step": 13677 }, { "epoch": 1.98, "grad_norm": 6.831417560577393, "learning_rate": 5.53245106821663e-07, "loss": 0.5546, "step": 13678 }, { "epoch": 1.98, "grad_norm": 6.859179973602295, "learning_rate": 5.531035038959124e-07, "loss": 0.5339, "step": 13679 }, { "epoch": 1.98, "grad_norm": 6.783839225769043, "learning_rate": 5.529619121664283e-07, "loss": 0.4803, "step": 13680 }, { "epoch": 1.99, "grad_norm": 7.13049840927124, "learning_rate": 5.528203316367563e-07, "loss": 0.6017, "step": 13681 }, { "epoch": 1.99, "grad_norm": 6.503716945648193, "learning_rate": 5.526787623104449e-07, "loss": 0.5598, "step": 13682 }, { "epoch": 1.99, "grad_norm": 7.667624473571777, "learning_rate": 5.525372041910397e-07, "loss": 0.5327, "step": 13683 }, { "epoch": 1.99, "grad_norm": 6.628016471862793, "learning_rate": 5.523956572820885e-07, "loss": 0.5091, "step": 13684 }, { "epoch": 1.99, "grad_norm": 6.539597034454346, "learning_rate": 5.522541215871366e-07, "loss": 0.482, "step": 13685 }, { "epoch": 1.99, "grad_norm": 6.476038932800293, "learning_rate": 5.521125971097299e-07, "loss": 0.4909, "step": 13686 }, { "epoch": 1.99, "grad_norm": 6.1894636154174805, "learning_rate": 5.519710838534145e-07, "loss": 0.5414, "step": 13687 }, { "epoch": 1.99, "grad_norm": 5.9124650955200195, "learning_rate": 5.518295818217356e-07, "loss": 0.4704, "step": 13688 }, { "epoch": 1.99, "grad_norm": 6.309144496917725, "learning_rate": 5.516880910182384e-07, "loss": 0.5265, "step": 13689 }, { "epoch": 1.99, "grad_norm": 6.454810619354248, "learning_rate": 5.515466114464675e-07, "loss": 0.5047, "step": 13690 }, { "epoch": 1.99, "grad_norm": 7.1764936447143555, "learning_rate": 5.514051431099677e-07, "loss": 0.6289, "step": 13691 }, { "epoch": 1.99, "grad_norm": 7.084616184234619, "learning_rate": 5.51263686012283e-07, "loss": 0.5344, "step": 13692 }, { "epoch": 1.99, "grad_norm": 7.705804347991943, "learning_rate": 5.511222401569577e-07, "loss": 0.5455, "step": 13693 }, { "epoch": 1.99, "grad_norm": 6.658587455749512, "learning_rate": 5.509808055475357e-07, "loss": 0.509, "step": 13694 }, { "epoch": 1.99, "grad_norm": 5.91001558303833, "learning_rate": 5.508393821875593e-07, "loss": 0.4581, "step": 13695 }, { "epoch": 1.99, "grad_norm": 6.604617595672607, "learning_rate": 5.50697970080573e-07, "loss": 0.5415, "step": 13696 }, { "epoch": 1.99, "grad_norm": 6.048340797424316, "learning_rate": 5.505565692301182e-07, "loss": 0.5505, "step": 13697 }, { "epoch": 1.99, "grad_norm": 7.017821788787842, "learning_rate": 5.504151796397392e-07, "loss": 0.4795, "step": 13698 }, { "epoch": 1.99, "grad_norm": 6.427071571350098, "learning_rate": 5.50273801312977e-07, "loss": 0.4809, "step": 13699 }, { "epoch": 1.99, "grad_norm": 6.375840187072754, "learning_rate": 5.501324342533739e-07, "loss": 0.4994, "step": 13700 }, { "epoch": 1.99, "grad_norm": 6.590425491333008, "learning_rate": 5.499910784644716e-07, "loss": 0.4808, "step": 13701 }, { "epoch": 1.99, "grad_norm": 7.048759937286377, "learning_rate": 5.498497339498118e-07, "loss": 0.5645, "step": 13702 }, { "epoch": 1.99, "grad_norm": 6.114660739898682, "learning_rate": 5.497084007129355e-07, "loss": 0.5325, "step": 13703 }, { "epoch": 1.99, "grad_norm": 6.94940185546875, "learning_rate": 5.495670787573832e-07, "loss": 0.536, "step": 13704 }, { "epoch": 1.99, "grad_norm": 5.947518348693848, "learning_rate": 5.494257680866966e-07, "loss": 0.5433, "step": 13705 }, { "epoch": 1.99, "grad_norm": 7.223172187805176, "learning_rate": 5.492844687044149e-07, "loss": 0.5462, "step": 13706 }, { "epoch": 1.99, "grad_norm": 6.533842086791992, "learning_rate": 5.491431806140785e-07, "loss": 0.5917, "step": 13707 }, { "epoch": 1.99, "grad_norm": 5.916351795196533, "learning_rate": 5.490019038192271e-07, "loss": 0.5006, "step": 13708 }, { "epoch": 1.99, "grad_norm": 7.243797779083252, "learning_rate": 5.488606383234002e-07, "loss": 0.5623, "step": 13709 }, { "epoch": 1.99, "grad_norm": 6.750857353210449, "learning_rate": 5.48719384130137e-07, "loss": 0.5321, "step": 13710 }, { "epoch": 1.99, "grad_norm": 7.107063293457031, "learning_rate": 5.485781412429764e-07, "loss": 0.4708, "step": 13711 }, { "epoch": 1.99, "grad_norm": 7.018109321594238, "learning_rate": 5.484369096654571e-07, "loss": 0.5827, "step": 13712 }, { "epoch": 1.99, "grad_norm": 7.079000473022461, "learning_rate": 5.482956894011171e-07, "loss": 0.5168, "step": 13713 }, { "epoch": 1.99, "grad_norm": 6.46181583404541, "learning_rate": 5.481544804534948e-07, "loss": 0.5637, "step": 13714 }, { "epoch": 1.99, "grad_norm": 7.252591133117676, "learning_rate": 5.480132828261282e-07, "loss": 0.5385, "step": 13715 }, { "epoch": 1.99, "grad_norm": 7.4692702293396, "learning_rate": 5.478720965225537e-07, "loss": 0.5431, "step": 13716 }, { "epoch": 1.99, "grad_norm": 6.850578308105469, "learning_rate": 5.477309215463098e-07, "loss": 0.5756, "step": 13717 }, { "epoch": 1.99, "grad_norm": 7.051894664764404, "learning_rate": 5.47589757900932e-07, "loss": 0.5788, "step": 13718 }, { "epoch": 1.99, "grad_norm": 6.212480068206787, "learning_rate": 5.474486055899585e-07, "loss": 0.5464, "step": 13719 }, { "epoch": 1.99, "grad_norm": 7.383185863494873, "learning_rate": 5.473074646169247e-07, "loss": 0.5935, "step": 13720 }, { "epoch": 1.99, "grad_norm": 6.400449752807617, "learning_rate": 5.471663349853666e-07, "loss": 0.5113, "step": 13721 }, { "epoch": 1.99, "grad_norm": 6.944577693939209, "learning_rate": 5.470252166988203e-07, "loss": 0.5295, "step": 13722 }, { "epoch": 1.99, "grad_norm": 6.57655668258667, "learning_rate": 5.468841097608213e-07, "loss": 0.4907, "step": 13723 }, { "epoch": 1.99, "grad_norm": 5.9059038162231445, "learning_rate": 5.467430141749045e-07, "loss": 0.4694, "step": 13724 }, { "epoch": 1.99, "grad_norm": 5.850046157836914, "learning_rate": 5.466019299446051e-07, "loss": 0.5284, "step": 13725 }, { "epoch": 1.99, "grad_norm": 7.531169414520264, "learning_rate": 5.464608570734576e-07, "loss": 0.54, "step": 13726 }, { "epoch": 1.99, "grad_norm": 6.614015102386475, "learning_rate": 5.463197955649965e-07, "loss": 0.4818, "step": 13727 }, { "epoch": 1.99, "grad_norm": 7.159332752227783, "learning_rate": 5.461787454227558e-07, "loss": 0.501, "step": 13728 }, { "epoch": 1.99, "grad_norm": 6.394339084625244, "learning_rate": 5.460377066502694e-07, "loss": 0.5315, "step": 13729 }, { "epoch": 1.99, "grad_norm": 6.102975368499756, "learning_rate": 5.4589667925107e-07, "loss": 0.5423, "step": 13730 }, { "epoch": 1.99, "grad_norm": 6.598641395568848, "learning_rate": 5.457556632286919e-07, "loss": 0.4938, "step": 13731 }, { "epoch": 1.99, "grad_norm": 6.268680095672607, "learning_rate": 5.456146585866677e-07, "loss": 0.501, "step": 13732 }, { "epoch": 1.99, "grad_norm": 6.542960166931152, "learning_rate": 5.454736653285297e-07, "loss": 0.5524, "step": 13733 }, { "epoch": 1.99, "grad_norm": 5.473033905029297, "learning_rate": 5.453326834578109e-07, "loss": 0.5092, "step": 13734 }, { "epoch": 1.99, "grad_norm": 6.4749650955200195, "learning_rate": 5.451917129780424e-07, "loss": 0.593, "step": 13735 }, { "epoch": 1.99, "grad_norm": 7.714757442474365, "learning_rate": 5.450507538927571e-07, "loss": 0.5974, "step": 13736 }, { "epoch": 1.99, "grad_norm": 7.3455305099487305, "learning_rate": 5.449098062054852e-07, "loss": 0.5073, "step": 13737 }, { "epoch": 1.99, "grad_norm": 6.475200653076172, "learning_rate": 5.447688699197594e-07, "loss": 0.5226, "step": 13738 }, { "epoch": 1.99, "grad_norm": 6.393797874450684, "learning_rate": 5.446279450391094e-07, "loss": 0.553, "step": 13739 }, { "epoch": 1.99, "grad_norm": 6.746332168579102, "learning_rate": 5.444870315670669e-07, "loss": 0.5665, "step": 13740 }, { "epoch": 1.99, "grad_norm": 5.857768535614014, "learning_rate": 5.443461295071612e-07, "loss": 0.5017, "step": 13741 }, { "epoch": 1.99, "grad_norm": 5.695366382598877, "learning_rate": 5.44205238862923e-07, "loss": 0.4744, "step": 13742 }, { "epoch": 1.99, "grad_norm": 6.140982627868652, "learning_rate": 5.44064359637882e-07, "loss": 0.5595, "step": 13743 }, { "epoch": 1.99, "grad_norm": 6.022497177124023, "learning_rate": 5.439234918355675e-07, "loss": 0.5033, "step": 13744 }, { "epoch": 1.99, "grad_norm": 6.828399658203125, "learning_rate": 5.43782635459509e-07, "loss": 0.5318, "step": 13745 }, { "epoch": 1.99, "grad_norm": 5.713476181030273, "learning_rate": 5.436417905132351e-07, "loss": 0.474, "step": 13746 }, { "epoch": 1.99, "grad_norm": 5.95463752746582, "learning_rate": 5.435009570002749e-07, "loss": 0.5451, "step": 13747 }, { "epoch": 1.99, "grad_norm": 7.964733600616455, "learning_rate": 5.433601349241563e-07, "loss": 0.6408, "step": 13748 }, { "epoch": 1.99, "grad_norm": 6.396942138671875, "learning_rate": 5.432193242884077e-07, "loss": 0.5131, "step": 13749 }, { "epoch": 2.0, "grad_norm": 6.103483200073242, "learning_rate": 5.430785250965571e-07, "loss": 0.4958, "step": 13750 }, { "epoch": 2.0, "grad_norm": 6.80909538269043, "learning_rate": 5.429377373521307e-07, "loss": 0.5481, "step": 13751 }, { "epoch": 2.0, "grad_norm": 6.875828742980957, "learning_rate": 5.427969610586576e-07, "loss": 0.4783, "step": 13752 }, { "epoch": 2.0, "grad_norm": 6.291336536407471, "learning_rate": 5.426561962196629e-07, "loss": 0.5069, "step": 13753 }, { "epoch": 2.0, "grad_norm": 6.531338214874268, "learning_rate": 5.425154428386748e-07, "loss": 0.4235, "step": 13754 }, { "epoch": 2.0, "grad_norm": 6.12122106552124, "learning_rate": 5.423747009192187e-07, "loss": 0.5362, "step": 13755 }, { "epoch": 2.0, "grad_norm": 6.458156108856201, "learning_rate": 5.422339704648204e-07, "loss": 0.5709, "step": 13756 }, { "epoch": 2.0, "grad_norm": 6.821264266967773, "learning_rate": 5.420932514790071e-07, "loss": 0.5259, "step": 13757 }, { "epoch": 2.0, "grad_norm": 5.943844795227051, "learning_rate": 5.419525439653024e-07, "loss": 0.4312, "step": 13758 }, { "epoch": 2.0, "grad_norm": 6.071971893310547, "learning_rate": 5.418118479272333e-07, "loss": 0.5115, "step": 13759 }, { "epoch": 2.0, "grad_norm": 7.494703769683838, "learning_rate": 5.416711633683236e-07, "loss": 0.5239, "step": 13760 }, { "epoch": 2.0, "grad_norm": 6.815484523773193, "learning_rate": 5.41530490292098e-07, "loss": 0.5177, "step": 13761 }, { "epoch": 2.0, "grad_norm": 6.026936054229736, "learning_rate": 5.413898287020812e-07, "loss": 0.5158, "step": 13762 }, { "epoch": 2.0, "grad_norm": 6.461928367614746, "learning_rate": 5.412491786017972e-07, "loss": 0.5311, "step": 13763 }, { "epoch": 2.0, "grad_norm": 7.861841201782227, "learning_rate": 5.411085399947694e-07, "loss": 0.5973, "step": 13764 }, { "epoch": 2.0, "grad_norm": 7.570191860198975, "learning_rate": 5.409679128845216e-07, "loss": 0.5636, "step": 13765 }, { "epoch": 2.0, "grad_norm": 6.460878372192383, "learning_rate": 5.408272972745768e-07, "loss": 0.4912, "step": 13766 }, { "epoch": 2.0, "grad_norm": 6.4273681640625, "learning_rate": 5.406866931684582e-07, "loss": 0.5043, "step": 13767 }, { "epoch": 2.0, "grad_norm": 6.259581565856934, "learning_rate": 5.40546100569688e-07, "loss": 0.5111, "step": 13768 }, { "epoch": 2.0, "grad_norm": 6.558874607086182, "learning_rate": 5.404055194817891e-07, "loss": 0.545, "step": 13769 }, { "epoch": 2.0, "grad_norm": 5.736105442047119, "learning_rate": 5.402649499082825e-07, "loss": 0.463, "step": 13770 }, { "epoch": 2.0, "grad_norm": 6.537276744842529, "learning_rate": 5.401243918526913e-07, "loss": 0.4651, "step": 13771 }, { "epoch": 2.0, "grad_norm": 7.029304027557373, "learning_rate": 5.399838453185355e-07, "loss": 0.5663, "step": 13772 }, { "epoch": 2.0, "grad_norm": 6.377528190612793, "learning_rate": 5.398433103093377e-07, "loss": 0.5134, "step": 13773 }, { "epoch": 2.0, "grad_norm": 7.303764820098877, "learning_rate": 5.397027868286173e-07, "loss": 0.4968, "step": 13774 }, { "epoch": 2.0, "grad_norm": 6.876492500305176, "learning_rate": 5.395622748798966e-07, "loss": 0.5683, "step": 13775 }, { "epoch": 2.0, "grad_norm": 5.729451656341553, "learning_rate": 5.394217744666945e-07, "loss": 0.4594, "step": 13776 }, { "epoch": 2.0, "grad_norm": 6.289154529571533, "learning_rate": 5.392812855925315e-07, "loss": 0.5698, "step": 13777 }, { "epoch": 2.0, "grad_norm": 6.451258659362793, "learning_rate": 5.391408082609272e-07, "loss": 0.5057, "step": 13778 }, { "epoch": 2.0, "grad_norm": 6.872193336486816, "learning_rate": 5.390003424754013e-07, "loss": 0.5925, "step": 13779 }, { "epoch": 2.0, "grad_norm": 8.293778419494629, "learning_rate": 5.388598882394726e-07, "loss": 0.5922, "step": 13780 }, { "epoch": 2.0, "grad_norm": 7.948909282684326, "learning_rate": 5.387194455566601e-07, "loss": 0.69, "step": 13781 }, { "epoch": 2.0, "grad_norm": 5.945732116699219, "learning_rate": 5.385790144304826e-07, "loss": 0.4941, "step": 13782 }, { "epoch": 2.0, "grad_norm": 6.835258960723877, "learning_rate": 5.38438594864458e-07, "loss": 0.4976, "step": 13783 }, { "epoch": 2.0, "grad_norm": 6.902457237243652, "learning_rate": 5.382981868621046e-07, "loss": 0.476, "step": 13784 }, { "epoch": 2.0, "grad_norm": 6.732838153839111, "learning_rate": 5.381577904269398e-07, "loss": 0.3454, "step": 13785 }, { "epoch": 2.0, "grad_norm": 6.366628646850586, "learning_rate": 5.380174055624812e-07, "loss": 0.3702, "step": 13786 }, { "epoch": 2.0, "grad_norm": 6.740590572357178, "learning_rate": 5.378770322722459e-07, "loss": 0.3835, "step": 13787 }, { "epoch": 2.0, "grad_norm": 7.241703510284424, "learning_rate": 5.377366705597506e-07, "loss": 0.3973, "step": 13788 }, { "epoch": 2.0, "grad_norm": 7.245478630065918, "learning_rate": 5.375963204285119e-07, "loss": 0.3881, "step": 13789 }, { "epoch": 2.0, "grad_norm": 6.8067803382873535, "learning_rate": 5.374559818820466e-07, "loss": 0.4044, "step": 13790 }, { "epoch": 2.0, "grad_norm": 6.630882740020752, "learning_rate": 5.373156549238692e-07, "loss": 0.3885, "step": 13791 }, { "epoch": 2.0, "grad_norm": 6.356083869934082, "learning_rate": 5.371753395574972e-07, "loss": 0.388, "step": 13792 }, { "epoch": 2.0, "grad_norm": 6.240586757659912, "learning_rate": 5.370350357864442e-07, "loss": 0.3441, "step": 13793 }, { "epoch": 2.0, "grad_norm": 6.976211071014404, "learning_rate": 5.368947436142268e-07, "loss": 0.3997, "step": 13794 }, { "epoch": 2.0, "grad_norm": 6.138167381286621, "learning_rate": 5.367544630443585e-07, "loss": 0.3179, "step": 13795 }, { "epoch": 2.0, "grad_norm": 6.724185943603516, "learning_rate": 5.366141940803552e-07, "loss": 0.3872, "step": 13796 }, { "epoch": 2.0, "grad_norm": 7.890953540802002, "learning_rate": 5.364739367257299e-07, "loss": 0.4337, "step": 13797 }, { "epoch": 2.0, "grad_norm": 7.779307842254639, "learning_rate": 5.36333690983997e-07, "loss": 0.3798, "step": 13798 }, { "epoch": 2.0, "grad_norm": 7.528775691986084, "learning_rate": 5.361934568586701e-07, "loss": 0.4269, "step": 13799 }, { "epoch": 2.0, "grad_norm": 6.7392144203186035, "learning_rate": 5.360532343532624e-07, "loss": 0.3358, "step": 13800 }, { "epoch": 2.0, "grad_norm": 7.229299545288086, "learning_rate": 5.359130234712873e-07, "loss": 0.3563, "step": 13801 }, { "epoch": 2.0, "grad_norm": 7.523636817932129, "learning_rate": 5.357728242162572e-07, "loss": 0.315, "step": 13802 }, { "epoch": 2.0, "grad_norm": 8.323677062988281, "learning_rate": 5.356326365916848e-07, "loss": 0.471, "step": 13803 }, { "epoch": 2.0, "grad_norm": 7.124406337738037, "learning_rate": 5.35492460601082e-07, "loss": 0.352, "step": 13804 }, { "epoch": 2.0, "grad_norm": 7.953540802001953, "learning_rate": 5.353522962479611e-07, "loss": 0.3794, "step": 13805 }, { "epoch": 2.0, "grad_norm": 8.091653823852539, "learning_rate": 5.352121435358337e-07, "loss": 0.451, "step": 13806 }, { "epoch": 2.0, "grad_norm": 7.084036350250244, "learning_rate": 5.350720024682099e-07, "loss": 0.3275, "step": 13807 }, { "epoch": 2.0, "grad_norm": 8.743040084838867, "learning_rate": 5.349318730486023e-07, "loss": 0.3229, "step": 13808 }, { "epoch": 2.0, "grad_norm": 7.551209926605225, "learning_rate": 5.347917552805208e-07, "loss": 0.3803, "step": 13809 }, { "epoch": 2.0, "grad_norm": 6.785894870758057, "learning_rate": 5.346516491674761e-07, "loss": 0.3653, "step": 13810 }, { "epoch": 2.0, "grad_norm": 7.459933757781982, "learning_rate": 5.345115547129786e-07, "loss": 0.3997, "step": 13811 }, { "epoch": 2.0, "grad_norm": 8.232168197631836, "learning_rate": 5.343714719205369e-07, "loss": 0.4593, "step": 13812 }, { "epoch": 2.0, "grad_norm": 7.229238510131836, "learning_rate": 5.342314007936621e-07, "loss": 0.3795, "step": 13813 }, { "epoch": 2.0, "grad_norm": 7.123932838439941, "learning_rate": 5.340913413358621e-07, "loss": 0.3513, "step": 13814 }, { "epoch": 2.0, "grad_norm": 7.337430477142334, "learning_rate": 5.339512935506473e-07, "loss": 0.3868, "step": 13815 }, { "epoch": 2.0, "grad_norm": 6.394808769226074, "learning_rate": 5.338112574415252e-07, "loss": 0.3522, "step": 13816 }, { "epoch": 2.0, "grad_norm": 6.6477484703063965, "learning_rate": 5.336712330120047e-07, "loss": 0.3335, "step": 13817 }, { "epoch": 2.0, "grad_norm": 7.322690963745117, "learning_rate": 5.335312202655934e-07, "loss": 0.4101, "step": 13818 }, { "epoch": 2.01, "grad_norm": 7.153505325317383, "learning_rate": 5.333912192057999e-07, "loss": 0.379, "step": 13819 }, { "epoch": 2.01, "grad_norm": 6.906675815582275, "learning_rate": 5.332512298361309e-07, "loss": 0.359, "step": 13820 }, { "epoch": 2.01, "grad_norm": 7.006896495819092, "learning_rate": 5.331112521600941e-07, "loss": 0.3434, "step": 13821 }, { "epoch": 2.01, "grad_norm": 6.6774749755859375, "learning_rate": 5.329712861811962e-07, "loss": 0.3504, "step": 13822 }, { "epoch": 2.01, "grad_norm": 6.773509979248047, "learning_rate": 5.32831331902944e-07, "loss": 0.3395, "step": 13823 }, { "epoch": 2.01, "grad_norm": 7.811430931091309, "learning_rate": 5.326913893288435e-07, "loss": 0.3612, "step": 13824 }, { "epoch": 2.01, "grad_norm": 8.123239517211914, "learning_rate": 5.325514584624014e-07, "loss": 0.3763, "step": 13825 }, { "epoch": 2.01, "grad_norm": 7.594934463500977, "learning_rate": 5.324115393071223e-07, "loss": 0.3485, "step": 13826 }, { "epoch": 2.01, "grad_norm": 7.067514419555664, "learning_rate": 5.32271631866513e-07, "loss": 0.3468, "step": 13827 }, { "epoch": 2.01, "grad_norm": 8.095060348510742, "learning_rate": 5.321317361440773e-07, "loss": 0.4269, "step": 13828 }, { "epoch": 2.01, "grad_norm": 7.367310047149658, "learning_rate": 5.319918521433215e-07, "loss": 0.3374, "step": 13829 }, { "epoch": 2.01, "grad_norm": 7.583829879760742, "learning_rate": 5.318519798677486e-07, "loss": 0.3742, "step": 13830 }, { "epoch": 2.01, "grad_norm": 8.439199447631836, "learning_rate": 5.317121193208646e-07, "loss": 0.43, "step": 13831 }, { "epoch": 2.01, "grad_norm": 7.293582439422607, "learning_rate": 5.315722705061721e-07, "loss": 0.307, "step": 13832 }, { "epoch": 2.01, "grad_norm": 8.158980369567871, "learning_rate": 5.314324334271752e-07, "loss": 0.3516, "step": 13833 }, { "epoch": 2.01, "grad_norm": 7.44420051574707, "learning_rate": 5.312926080873772e-07, "loss": 0.3873, "step": 13834 }, { "epoch": 2.01, "grad_norm": 6.6267781257629395, "learning_rate": 5.311527944902812e-07, "loss": 0.333, "step": 13835 }, { "epoch": 2.01, "grad_norm": 8.446945190429688, "learning_rate": 5.310129926393908e-07, "loss": 0.3744, "step": 13836 }, { "epoch": 2.01, "grad_norm": 7.250885963439941, "learning_rate": 5.308732025382074e-07, "loss": 0.3531, "step": 13837 }, { "epoch": 2.01, "grad_norm": 6.94489049911499, "learning_rate": 5.307334241902337e-07, "loss": 0.3126, "step": 13838 }, { "epoch": 2.01, "grad_norm": 8.120640754699707, "learning_rate": 5.305936575989715e-07, "loss": 0.3442, "step": 13839 }, { "epoch": 2.01, "grad_norm": 7.203439712524414, "learning_rate": 5.304539027679227e-07, "loss": 0.3377, "step": 13840 }, { "epoch": 2.01, "grad_norm": 7.236271381378174, "learning_rate": 5.303141597005885e-07, "loss": 0.2955, "step": 13841 }, { "epoch": 2.01, "grad_norm": 7.562744140625, "learning_rate": 5.301744284004697e-07, "loss": 0.3668, "step": 13842 }, { "epoch": 2.01, "grad_norm": 7.573709964752197, "learning_rate": 5.30034708871067e-07, "loss": 0.3482, "step": 13843 }, { "epoch": 2.01, "grad_norm": 8.238250732421875, "learning_rate": 5.298950011158814e-07, "loss": 0.4198, "step": 13844 }, { "epoch": 2.01, "grad_norm": 7.5864996910095215, "learning_rate": 5.297553051384125e-07, "loss": 0.3601, "step": 13845 }, { "epoch": 2.01, "grad_norm": 7.073519706726074, "learning_rate": 5.29615620942161e-07, "loss": 0.3652, "step": 13846 }, { "epoch": 2.01, "grad_norm": 6.615802764892578, "learning_rate": 5.294759485306248e-07, "loss": 0.3344, "step": 13847 }, { "epoch": 2.01, "grad_norm": 7.64975643157959, "learning_rate": 5.29336287907305e-07, "loss": 0.4015, "step": 13848 }, { "epoch": 2.01, "grad_norm": 8.24319076538086, "learning_rate": 5.291966390756991e-07, "loss": 0.3143, "step": 13849 }, { "epoch": 2.01, "grad_norm": 7.119041919708252, "learning_rate": 5.29057002039307e-07, "loss": 0.3464, "step": 13850 }, { "epoch": 2.01, "grad_norm": 7.88205623626709, "learning_rate": 5.28917376801626e-07, "loss": 0.3462, "step": 13851 }, { "epoch": 2.01, "grad_norm": 8.044456481933594, "learning_rate": 5.287777633661552e-07, "loss": 0.3731, "step": 13852 }, { "epoch": 2.01, "grad_norm": 6.98882532119751, "learning_rate": 5.286381617363918e-07, "loss": 0.3097, "step": 13853 }, { "epoch": 2.01, "grad_norm": 8.657674789428711, "learning_rate": 5.284985719158331e-07, "loss": 0.4086, "step": 13854 }, { "epoch": 2.01, "grad_norm": 8.771224975585938, "learning_rate": 5.283589939079767e-07, "loss": 0.4331, "step": 13855 }, { "epoch": 2.01, "grad_norm": 7.125185012817383, "learning_rate": 5.282194277163194e-07, "loss": 0.3393, "step": 13856 }, { "epoch": 2.01, "grad_norm": 6.775949478149414, "learning_rate": 5.280798733443577e-07, "loss": 0.3543, "step": 13857 }, { "epoch": 2.01, "grad_norm": 6.590304374694824, "learning_rate": 5.279403307955881e-07, "loss": 0.3248, "step": 13858 }, { "epoch": 2.01, "grad_norm": 7.732834339141846, "learning_rate": 5.278008000735064e-07, "loss": 0.3736, "step": 13859 }, { "epoch": 2.01, "grad_norm": 8.342225074768066, "learning_rate": 5.276612811816083e-07, "loss": 0.438, "step": 13860 }, { "epoch": 2.01, "grad_norm": 7.86206579208374, "learning_rate": 5.275217741233895e-07, "loss": 0.3846, "step": 13861 }, { "epoch": 2.01, "grad_norm": 7.129465579986572, "learning_rate": 5.273822789023448e-07, "loss": 0.3704, "step": 13862 }, { "epoch": 2.01, "grad_norm": 8.085043907165527, "learning_rate": 5.272427955219694e-07, "loss": 0.4055, "step": 13863 }, { "epoch": 2.01, "grad_norm": 6.970576286315918, "learning_rate": 5.271033239857574e-07, "loss": 0.3187, "step": 13864 }, { "epoch": 2.01, "grad_norm": 8.005825996398926, "learning_rate": 5.269638642972034e-07, "loss": 0.3765, "step": 13865 }, { "epoch": 2.01, "grad_norm": 7.574260234832764, "learning_rate": 5.268244164598012e-07, "loss": 0.3544, "step": 13866 }, { "epoch": 2.01, "grad_norm": 7.292708873748779, "learning_rate": 5.266849804770445e-07, "loss": 0.3865, "step": 13867 }, { "epoch": 2.01, "grad_norm": 7.022526741027832, "learning_rate": 5.265455563524261e-07, "loss": 0.3399, "step": 13868 }, { "epoch": 2.01, "grad_norm": 7.486403465270996, "learning_rate": 5.264061440894403e-07, "loss": 0.3305, "step": 13869 }, { "epoch": 2.01, "grad_norm": 8.083440780639648, "learning_rate": 5.262667436915781e-07, "loss": 0.3689, "step": 13870 }, { "epoch": 2.01, "grad_norm": 7.290388584136963, "learning_rate": 5.261273551623337e-07, "loss": 0.3535, "step": 13871 }, { "epoch": 2.01, "grad_norm": 8.384207725524902, "learning_rate": 5.259879785051981e-07, "loss": 0.3736, "step": 13872 }, { "epoch": 2.01, "grad_norm": 7.552250385284424, "learning_rate": 5.258486137236635e-07, "loss": 0.3666, "step": 13873 }, { "epoch": 2.01, "grad_norm": 8.709175109863281, "learning_rate": 5.257092608212214e-07, "loss": 0.3974, "step": 13874 }, { "epoch": 2.01, "grad_norm": 6.867166042327881, "learning_rate": 5.255699198013632e-07, "loss": 0.3258, "step": 13875 }, { "epoch": 2.01, "grad_norm": 8.114363670349121, "learning_rate": 5.254305906675798e-07, "loss": 0.3424, "step": 13876 }, { "epoch": 2.01, "grad_norm": 7.556342601776123, "learning_rate": 5.252912734233619e-07, "loss": 0.3181, "step": 13877 }, { "epoch": 2.01, "grad_norm": 8.078083038330078, "learning_rate": 5.251519680721998e-07, "loss": 0.3614, "step": 13878 }, { "epoch": 2.01, "grad_norm": 7.161413669586182, "learning_rate": 5.250126746175836e-07, "loss": 0.311, "step": 13879 }, { "epoch": 2.01, "grad_norm": 7.516450881958008, "learning_rate": 5.248733930630029e-07, "loss": 0.3853, "step": 13880 }, { "epoch": 2.01, "grad_norm": 7.61683464050293, "learning_rate": 5.247341234119481e-07, "loss": 0.3892, "step": 13881 }, { "epoch": 2.01, "grad_norm": 7.482606410980225, "learning_rate": 5.245948656679065e-07, "loss": 0.323, "step": 13882 }, { "epoch": 2.01, "grad_norm": 6.6087260246276855, "learning_rate": 5.244556198343688e-07, "loss": 0.3029, "step": 13883 }, { "epoch": 2.01, "grad_norm": 7.445152759552002, "learning_rate": 5.243163859148223e-07, "loss": 0.334, "step": 13884 }, { "epoch": 2.01, "grad_norm": 7.86255407333374, "learning_rate": 5.241771639127566e-07, "loss": 0.395, "step": 13885 }, { "epoch": 2.01, "grad_norm": 8.04928207397461, "learning_rate": 5.240379538316581e-07, "loss": 0.3697, "step": 13886 }, { "epoch": 2.01, "grad_norm": 7.231695652008057, "learning_rate": 5.238987556750157e-07, "loss": 0.3429, "step": 13887 }, { "epoch": 2.02, "grad_norm": 7.1951799392700195, "learning_rate": 5.237595694463169e-07, "loss": 0.3424, "step": 13888 }, { "epoch": 2.02, "grad_norm": 7.803397178649902, "learning_rate": 5.236203951490473e-07, "loss": 0.3841, "step": 13889 }, { "epoch": 2.02, "grad_norm": 7.495925426483154, "learning_rate": 5.234812327866956e-07, "loss": 0.2948, "step": 13890 }, { "epoch": 2.02, "grad_norm": 8.40273380279541, "learning_rate": 5.233420823627464e-07, "loss": 0.3679, "step": 13891 }, { "epoch": 2.02, "grad_norm": 7.30499267578125, "learning_rate": 5.232029438806878e-07, "loss": 0.3368, "step": 13892 }, { "epoch": 2.02, "grad_norm": 7.219565391540527, "learning_rate": 5.230638173440043e-07, "loss": 0.3578, "step": 13893 }, { "epoch": 2.02, "grad_norm": 7.912754535675049, "learning_rate": 5.229247027561818e-07, "loss": 0.3872, "step": 13894 }, { "epoch": 2.02, "grad_norm": 7.360992908477783, "learning_rate": 5.227856001207059e-07, "loss": 0.3517, "step": 13895 }, { "epoch": 2.02, "grad_norm": 7.847841739654541, "learning_rate": 5.226465094410613e-07, "loss": 0.3537, "step": 13896 }, { "epoch": 2.02, "grad_norm": 6.840582847595215, "learning_rate": 5.225074307207326e-07, "loss": 0.3469, "step": 13897 }, { "epoch": 2.02, "grad_norm": 8.485730171203613, "learning_rate": 5.223683639632048e-07, "loss": 0.3523, "step": 13898 }, { "epoch": 2.02, "grad_norm": 7.719122409820557, "learning_rate": 5.222293091719612e-07, "loss": 0.3676, "step": 13899 }, { "epoch": 2.02, "grad_norm": 7.275749683380127, "learning_rate": 5.220902663504861e-07, "loss": 0.343, "step": 13900 }, { "epoch": 2.02, "grad_norm": 8.340071678161621, "learning_rate": 5.219512355022629e-07, "loss": 0.4268, "step": 13901 }, { "epoch": 2.02, "grad_norm": 8.40505313873291, "learning_rate": 5.21812216630775e-07, "loss": 0.4102, "step": 13902 }, { "epoch": 2.02, "grad_norm": 7.695790767669678, "learning_rate": 5.216732097395045e-07, "loss": 0.3693, "step": 13903 }, { "epoch": 2.02, "grad_norm": 7.982128143310547, "learning_rate": 5.21534214831935e-07, "loss": 0.3698, "step": 13904 }, { "epoch": 2.02, "grad_norm": 7.402360916137695, "learning_rate": 5.213952319115478e-07, "loss": 0.3376, "step": 13905 }, { "epoch": 2.02, "grad_norm": 7.484516143798828, "learning_rate": 5.212562609818261e-07, "loss": 0.3725, "step": 13906 }, { "epoch": 2.02, "grad_norm": 8.96133804321289, "learning_rate": 5.211173020462505e-07, "loss": 0.3783, "step": 13907 }, { "epoch": 2.02, "grad_norm": 8.885583877563477, "learning_rate": 5.209783551083027e-07, "loss": 0.3841, "step": 13908 }, { "epoch": 2.02, "grad_norm": 7.190975666046143, "learning_rate": 5.20839420171464e-07, "loss": 0.3475, "step": 13909 }, { "epoch": 2.02, "grad_norm": 7.208652496337891, "learning_rate": 5.207004972392151e-07, "loss": 0.3621, "step": 13910 }, { "epoch": 2.02, "grad_norm": 7.638702392578125, "learning_rate": 5.205615863150364e-07, "loss": 0.3821, "step": 13911 }, { "epoch": 2.02, "grad_norm": 8.502365112304688, "learning_rate": 5.204226874024083e-07, "loss": 0.4194, "step": 13912 }, { "epoch": 2.02, "grad_norm": 7.149105548858643, "learning_rate": 5.202838005048103e-07, "loss": 0.3477, "step": 13913 }, { "epoch": 2.02, "grad_norm": 8.313471794128418, "learning_rate": 5.201449256257223e-07, "loss": 0.4439, "step": 13914 }, { "epoch": 2.02, "grad_norm": 8.335040092468262, "learning_rate": 5.200060627686236e-07, "loss": 0.3742, "step": 13915 }, { "epoch": 2.02, "grad_norm": 9.036474227905273, "learning_rate": 5.198672119369929e-07, "loss": 0.4072, "step": 13916 }, { "epoch": 2.02, "grad_norm": 7.35715389251709, "learning_rate": 5.197283731343091e-07, "loss": 0.2909, "step": 13917 }, { "epoch": 2.02, "grad_norm": 7.453573703765869, "learning_rate": 5.195895463640507e-07, "loss": 0.3638, "step": 13918 }, { "epoch": 2.02, "grad_norm": 8.287626266479492, "learning_rate": 5.194507316296957e-07, "loss": 0.3559, "step": 13919 }, { "epoch": 2.02, "grad_norm": 7.459197521209717, "learning_rate": 5.193119289347216e-07, "loss": 0.2944, "step": 13920 }, { "epoch": 2.02, "grad_norm": 9.011377334594727, "learning_rate": 5.191731382826062e-07, "loss": 0.4284, "step": 13921 }, { "epoch": 2.02, "grad_norm": 8.618413925170898, "learning_rate": 5.190343596768265e-07, "loss": 0.3956, "step": 13922 }, { "epoch": 2.02, "grad_norm": 7.879245281219482, "learning_rate": 5.188955931208599e-07, "loss": 0.3364, "step": 13923 }, { "epoch": 2.02, "grad_norm": 7.161075115203857, "learning_rate": 5.187568386181816e-07, "loss": 0.3558, "step": 13924 }, { "epoch": 2.02, "grad_norm": 7.703433990478516, "learning_rate": 5.186180961722697e-07, "loss": 0.3785, "step": 13925 }, { "epoch": 2.02, "grad_norm": 7.47227144241333, "learning_rate": 5.184793657865983e-07, "loss": 0.3528, "step": 13926 }, { "epoch": 2.02, "grad_norm": 6.484291076660156, "learning_rate": 5.183406474646449e-07, "loss": 0.3646, "step": 13927 }, { "epoch": 2.02, "grad_norm": 9.139532089233398, "learning_rate": 5.182019412098835e-07, "loss": 0.3176, "step": 13928 }, { "epoch": 2.02, "grad_norm": 7.339688301086426, "learning_rate": 5.180632470257898e-07, "loss": 0.3744, "step": 13929 }, { "epoch": 2.02, "grad_norm": 8.156030654907227, "learning_rate": 5.179245649158382e-07, "loss": 0.3763, "step": 13930 }, { "epoch": 2.02, "grad_norm": 8.372072219848633, "learning_rate": 5.177858948835032e-07, "loss": 0.3878, "step": 13931 }, { "epoch": 2.02, "grad_norm": 7.862950801849365, "learning_rate": 5.176472369322593e-07, "loss": 0.3333, "step": 13932 }, { "epoch": 2.02, "grad_norm": 8.169745445251465, "learning_rate": 5.175085910655801e-07, "loss": 0.3861, "step": 13933 }, { "epoch": 2.02, "grad_norm": 8.192878723144531, "learning_rate": 5.173699572869392e-07, "loss": 0.3872, "step": 13934 }, { "epoch": 2.02, "grad_norm": 8.153976440429688, "learning_rate": 5.172313355998099e-07, "loss": 0.4073, "step": 13935 }, { "epoch": 2.02, "grad_norm": 7.824161052703857, "learning_rate": 5.17092726007665e-07, "loss": 0.3904, "step": 13936 }, { "epoch": 2.02, "grad_norm": 8.377603530883789, "learning_rate": 5.169541285139776e-07, "loss": 0.3677, "step": 13937 }, { "epoch": 2.02, "grad_norm": 7.167860507965088, "learning_rate": 5.168155431222187e-07, "loss": 0.34, "step": 13938 }, { "epoch": 2.02, "grad_norm": 8.057345390319824, "learning_rate": 5.166769698358618e-07, "loss": 0.3604, "step": 13939 }, { "epoch": 2.02, "grad_norm": 7.172343730926514, "learning_rate": 5.165384086583782e-07, "loss": 0.3313, "step": 13940 }, { "epoch": 2.02, "grad_norm": 7.925576686859131, "learning_rate": 5.163998595932391e-07, "loss": 0.3644, "step": 13941 }, { "epoch": 2.02, "grad_norm": 7.588109493255615, "learning_rate": 5.162613226439158e-07, "loss": 0.3029, "step": 13942 }, { "epoch": 2.02, "grad_norm": 8.090328216552734, "learning_rate": 5.161227978138792e-07, "loss": 0.3854, "step": 13943 }, { "epoch": 2.02, "grad_norm": 6.522777080535889, "learning_rate": 5.159842851065998e-07, "loss": 0.2966, "step": 13944 }, { "epoch": 2.02, "grad_norm": 8.28768539428711, "learning_rate": 5.158457845255471e-07, "loss": 0.4126, "step": 13945 }, { "epoch": 2.02, "grad_norm": 8.060667037963867, "learning_rate": 5.157072960741922e-07, "loss": 0.393, "step": 13946 }, { "epoch": 2.02, "grad_norm": 10.035683631896973, "learning_rate": 5.155688197560035e-07, "loss": 0.3968, "step": 13947 }, { "epoch": 2.02, "grad_norm": 7.8728718757629395, "learning_rate": 5.154303555744517e-07, "loss": 0.3865, "step": 13948 }, { "epoch": 2.02, "grad_norm": 8.25551700592041, "learning_rate": 5.152919035330045e-07, "loss": 0.3759, "step": 13949 }, { "epoch": 2.02, "grad_norm": 8.252021789550781, "learning_rate": 5.151534636351312e-07, "loss": 0.403, "step": 13950 }, { "epoch": 2.02, "grad_norm": 8.440979957580566, "learning_rate": 5.150150358842999e-07, "loss": 0.351, "step": 13951 }, { "epoch": 2.02, "grad_norm": 8.924795150756836, "learning_rate": 5.14876620283979e-07, "loss": 0.3509, "step": 13952 }, { "epoch": 2.02, "grad_norm": 6.74432373046875, "learning_rate": 5.147382168376361e-07, "loss": 0.3036, "step": 13953 }, { "epoch": 2.02, "grad_norm": 7.532063007354736, "learning_rate": 5.145998255487387e-07, "loss": 0.3589, "step": 13954 }, { "epoch": 2.02, "grad_norm": 8.642976760864258, "learning_rate": 5.14461446420754e-07, "loss": 0.4194, "step": 13955 }, { "epoch": 2.02, "grad_norm": 7.648402690887451, "learning_rate": 5.143230794571488e-07, "loss": 0.3833, "step": 13956 }, { "epoch": 2.03, "grad_norm": 9.963531494140625, "learning_rate": 5.141847246613897e-07, "loss": 0.4436, "step": 13957 }, { "epoch": 2.03, "grad_norm": 7.46907901763916, "learning_rate": 5.140463820369434e-07, "loss": 0.3543, "step": 13958 }, { "epoch": 2.03, "grad_norm": 8.638153076171875, "learning_rate": 5.139080515872747e-07, "loss": 0.4166, "step": 13959 }, { "epoch": 2.03, "grad_norm": 7.045699596405029, "learning_rate": 5.137697333158506e-07, "loss": 0.3525, "step": 13960 }, { "epoch": 2.03, "grad_norm": 8.595016479492188, "learning_rate": 5.13631427226135e-07, "loss": 0.4534, "step": 13961 }, { "epoch": 2.03, "grad_norm": 7.991994857788086, "learning_rate": 5.134931333215946e-07, "loss": 0.3886, "step": 13962 }, { "epoch": 2.03, "grad_norm": 7.127673625946045, "learning_rate": 5.133548516056928e-07, "loss": 0.3241, "step": 13963 }, { "epoch": 2.03, "grad_norm": 8.963996887207031, "learning_rate": 5.132165820818946e-07, "loss": 0.4364, "step": 13964 }, { "epoch": 2.03, "grad_norm": 8.030913352966309, "learning_rate": 5.130783247536638e-07, "loss": 0.3046, "step": 13965 }, { "epoch": 2.03, "grad_norm": 7.943405628204346, "learning_rate": 5.12940079624464e-07, "loss": 0.3506, "step": 13966 }, { "epoch": 2.03, "grad_norm": 8.548304557800293, "learning_rate": 5.128018466977601e-07, "loss": 0.372, "step": 13967 }, { "epoch": 2.03, "grad_norm": 7.430370807647705, "learning_rate": 5.126636259770134e-07, "loss": 0.3343, "step": 13968 }, { "epoch": 2.03, "grad_norm": 8.485600471496582, "learning_rate": 5.125254174656886e-07, "loss": 0.3939, "step": 13969 }, { "epoch": 2.03, "grad_norm": 7.858916759490967, "learning_rate": 5.123872211672468e-07, "loss": 0.3635, "step": 13970 }, { "epoch": 2.03, "grad_norm": 8.078964233398438, "learning_rate": 5.122490370851511e-07, "loss": 0.4155, "step": 13971 }, { "epoch": 2.03, "grad_norm": 7.62211275100708, "learning_rate": 5.121108652228633e-07, "loss": 0.2536, "step": 13972 }, { "epoch": 2.03, "grad_norm": 8.478761672973633, "learning_rate": 5.11972705583845e-07, "loss": 0.3647, "step": 13973 }, { "epoch": 2.03, "grad_norm": 6.92476224899292, "learning_rate": 5.118345581715575e-07, "loss": 0.2583, "step": 13974 }, { "epoch": 2.03, "grad_norm": 6.962411403656006, "learning_rate": 5.116964229894619e-07, "loss": 0.3768, "step": 13975 }, { "epoch": 2.03, "grad_norm": 8.0542631149292, "learning_rate": 5.115583000410191e-07, "loss": 0.3211, "step": 13976 }, { "epoch": 2.03, "grad_norm": 9.376029968261719, "learning_rate": 5.114201893296894e-07, "loss": 0.3993, "step": 13977 }, { "epoch": 2.03, "grad_norm": 7.501262187957764, "learning_rate": 5.112820908589331e-07, "loss": 0.3428, "step": 13978 }, { "epoch": 2.03, "grad_norm": 8.295281410217285, "learning_rate": 5.111440046322104e-07, "loss": 0.3558, "step": 13979 }, { "epoch": 2.03, "grad_norm": 7.4478254318237305, "learning_rate": 5.110059306529795e-07, "loss": 0.2862, "step": 13980 }, { "epoch": 2.03, "grad_norm": 7.297536849975586, "learning_rate": 5.108678689247012e-07, "loss": 0.3616, "step": 13981 }, { "epoch": 2.03, "grad_norm": 8.737295150756836, "learning_rate": 5.10729819450833e-07, "loss": 0.4535, "step": 13982 }, { "epoch": 2.03, "grad_norm": 7.740675926208496, "learning_rate": 5.105917822348347e-07, "loss": 0.3189, "step": 13983 }, { "epoch": 2.03, "grad_norm": 6.80889368057251, "learning_rate": 5.10453757280164e-07, "loss": 0.3739, "step": 13984 }, { "epoch": 2.03, "grad_norm": 7.481692790985107, "learning_rate": 5.103157445902788e-07, "loss": 0.3593, "step": 13985 }, { "epoch": 2.03, "grad_norm": 7.92197322845459, "learning_rate": 5.10177744168637e-07, "loss": 0.3224, "step": 13986 }, { "epoch": 2.03, "grad_norm": 8.664774894714355, "learning_rate": 5.100397560186961e-07, "loss": 0.3754, "step": 13987 }, { "epoch": 2.03, "grad_norm": 8.218579292297363, "learning_rate": 5.09901780143913e-07, "loss": 0.3746, "step": 13988 }, { "epoch": 2.03, "grad_norm": 8.934731483459473, "learning_rate": 5.097638165477442e-07, "loss": 0.3698, "step": 13989 }, { "epoch": 2.03, "grad_norm": 8.318309783935547, "learning_rate": 5.096258652336467e-07, "loss": 0.3071, "step": 13990 }, { "epoch": 2.03, "grad_norm": 9.548872947692871, "learning_rate": 5.094879262050762e-07, "loss": 0.3546, "step": 13991 }, { "epoch": 2.03, "grad_norm": 8.200082778930664, "learning_rate": 5.093499994654888e-07, "loss": 0.3395, "step": 13992 }, { "epoch": 2.03, "grad_norm": 7.876128196716309, "learning_rate": 5.092120850183399e-07, "loss": 0.3936, "step": 13993 }, { "epoch": 2.03, "grad_norm": 8.450935363769531, "learning_rate": 5.090741828670848e-07, "loss": 0.3467, "step": 13994 }, { "epoch": 2.03, "grad_norm": 8.057284355163574, "learning_rate": 5.089362930151784e-07, "loss": 0.3282, "step": 13995 }, { "epoch": 2.03, "grad_norm": 7.4467692375183105, "learning_rate": 5.087984154660752e-07, "loss": 0.3138, "step": 13996 }, { "epoch": 2.03, "grad_norm": 9.189349174499512, "learning_rate": 5.086605502232296e-07, "loss": 0.3989, "step": 13997 }, { "epoch": 2.03, "grad_norm": 8.3342866897583, "learning_rate": 5.08522697290096e-07, "loss": 0.3223, "step": 13998 }, { "epoch": 2.03, "grad_norm": 8.049604415893555, "learning_rate": 5.083848566701268e-07, "loss": 0.3336, "step": 13999 }, { "epoch": 2.03, "grad_norm": 8.559622764587402, "learning_rate": 5.082470283667771e-07, "loss": 0.392, "step": 14000 }, { "epoch": 2.03, "grad_norm": 7.980502605438232, "learning_rate": 5.081092123834983e-07, "loss": 0.3779, "step": 14001 }, { "epoch": 2.03, "grad_norm": 8.04265022277832, "learning_rate": 5.079714087237447e-07, "loss": 0.3251, "step": 14002 }, { "epoch": 2.03, "grad_norm": 8.950101852416992, "learning_rate": 5.078336173909672e-07, "loss": 0.4281, "step": 14003 }, { "epoch": 2.03, "grad_norm": 7.543115139007568, "learning_rate": 5.076958383886196e-07, "loss": 0.3598, "step": 14004 }, { "epoch": 2.03, "grad_norm": 8.216416358947754, "learning_rate": 5.075580717201525e-07, "loss": 0.3539, "step": 14005 }, { "epoch": 2.03, "grad_norm": 7.561665058135986, "learning_rate": 5.074203173890175e-07, "loss": 0.3898, "step": 14006 }, { "epoch": 2.03, "grad_norm": 7.041023254394531, "learning_rate": 5.072825753986662e-07, "loss": 0.3179, "step": 14007 }, { "epoch": 2.03, "grad_norm": 8.595738410949707, "learning_rate": 5.071448457525495e-07, "loss": 0.3661, "step": 14008 }, { "epoch": 2.03, "grad_norm": 8.039438247680664, "learning_rate": 5.07007128454118e-07, "loss": 0.3661, "step": 14009 }, { "epoch": 2.03, "grad_norm": 7.646783351898193, "learning_rate": 5.068694235068216e-07, "loss": 0.3931, "step": 14010 }, { "epoch": 2.03, "grad_norm": 7.019274711608887, "learning_rate": 5.067317309141106e-07, "loss": 0.313, "step": 14011 }, { "epoch": 2.03, "grad_norm": 8.04798412322998, "learning_rate": 5.065940506794347e-07, "loss": 0.3581, "step": 14012 }, { "epoch": 2.03, "grad_norm": 8.429670333862305, "learning_rate": 5.064563828062431e-07, "loss": 0.3941, "step": 14013 }, { "epoch": 2.03, "grad_norm": 8.241759300231934, "learning_rate": 5.063187272979854e-07, "loss": 0.335, "step": 14014 }, { "epoch": 2.03, "grad_norm": 7.301107883453369, "learning_rate": 5.061810841581089e-07, "loss": 0.346, "step": 14015 }, { "epoch": 2.03, "grad_norm": 7.357784271240234, "learning_rate": 5.060434533900638e-07, "loss": 0.3411, "step": 14016 }, { "epoch": 2.03, "grad_norm": 7.739006042480469, "learning_rate": 5.059058349972967e-07, "loss": 0.2815, "step": 14017 }, { "epoch": 2.03, "grad_norm": 8.512606620788574, "learning_rate": 5.057682289832563e-07, "loss": 0.3299, "step": 14018 }, { "epoch": 2.03, "grad_norm": 8.233138084411621, "learning_rate": 5.056306353513904e-07, "loss": 0.3738, "step": 14019 }, { "epoch": 2.03, "grad_norm": 8.282350540161133, "learning_rate": 5.054930541051447e-07, "loss": 0.3504, "step": 14020 }, { "epoch": 2.03, "grad_norm": 7.133728504180908, "learning_rate": 5.053554852479681e-07, "loss": 0.3546, "step": 14021 }, { "epoch": 2.03, "grad_norm": 8.3908052444458, "learning_rate": 5.052179287833051e-07, "loss": 0.3465, "step": 14022 }, { "epoch": 2.03, "grad_norm": 7.6935200691223145, "learning_rate": 5.050803847146039e-07, "loss": 0.3628, "step": 14023 }, { "epoch": 2.03, "grad_norm": 8.54587459564209, "learning_rate": 5.049428530453086e-07, "loss": 0.4376, "step": 14024 }, { "epoch": 2.03, "grad_norm": 6.58272123336792, "learning_rate": 5.048053337788667e-07, "loss": 0.3139, "step": 14025 }, { "epoch": 2.04, "grad_norm": 7.805581092834473, "learning_rate": 5.046678269187219e-07, "loss": 0.377, "step": 14026 }, { "epoch": 2.04, "grad_norm": 7.6098856925964355, "learning_rate": 5.045303324683201e-07, "loss": 0.378, "step": 14027 }, { "epoch": 2.04, "grad_norm": 7.169929504394531, "learning_rate": 5.043928504311057e-07, "loss": 0.3461, "step": 14028 }, { "epoch": 2.04, "grad_norm": 8.058558464050293, "learning_rate": 5.04255380810523e-07, "loss": 0.3736, "step": 14029 }, { "epoch": 2.04, "grad_norm": 7.617104530334473, "learning_rate": 5.041179236100164e-07, "loss": 0.3724, "step": 14030 }, { "epoch": 2.04, "grad_norm": 7.666420936584473, "learning_rate": 5.039804788330295e-07, "loss": 0.3326, "step": 14031 }, { "epoch": 2.04, "grad_norm": 7.081164360046387, "learning_rate": 5.038430464830056e-07, "loss": 0.378, "step": 14032 }, { "epoch": 2.04, "grad_norm": 6.183199405670166, "learning_rate": 5.037056265633881e-07, "loss": 0.2936, "step": 14033 }, { "epoch": 2.04, "grad_norm": 8.101204872131348, "learning_rate": 5.035682190776198e-07, "loss": 0.3779, "step": 14034 }, { "epoch": 2.04, "grad_norm": 8.040884971618652, "learning_rate": 5.034308240291434e-07, "loss": 0.3736, "step": 14035 }, { "epoch": 2.04, "grad_norm": 7.7340545654296875, "learning_rate": 5.032934414214003e-07, "loss": 0.3577, "step": 14036 }, { "epoch": 2.04, "grad_norm": 8.392641067504883, "learning_rate": 5.031560712578335e-07, "loss": 0.3216, "step": 14037 }, { "epoch": 2.04, "grad_norm": 8.608720779418945, "learning_rate": 5.030187135418834e-07, "loss": 0.3403, "step": 14038 }, { "epoch": 2.04, "grad_norm": 8.518077850341797, "learning_rate": 5.028813682769926e-07, "loss": 0.3764, "step": 14039 }, { "epoch": 2.04, "grad_norm": 9.931585311889648, "learning_rate": 5.027440354666012e-07, "loss": 0.4495, "step": 14040 }, { "epoch": 2.04, "grad_norm": 6.842650413513184, "learning_rate": 5.026067151141498e-07, "loss": 0.3561, "step": 14041 }, { "epoch": 2.04, "grad_norm": 8.115671157836914, "learning_rate": 5.024694072230792e-07, "loss": 0.3671, "step": 14042 }, { "epoch": 2.04, "grad_norm": 7.895916938781738, "learning_rate": 5.023321117968286e-07, "loss": 0.3589, "step": 14043 }, { "epoch": 2.04, "grad_norm": 7.633205413818359, "learning_rate": 5.021948288388392e-07, "loss": 0.3794, "step": 14044 }, { "epoch": 2.04, "grad_norm": 7.511226654052734, "learning_rate": 5.020575583525491e-07, "loss": 0.3724, "step": 14045 }, { "epoch": 2.04, "grad_norm": 9.430781364440918, "learning_rate": 5.019203003413979e-07, "loss": 0.4158, "step": 14046 }, { "epoch": 2.04, "grad_norm": 8.672290802001953, "learning_rate": 5.017830548088242e-07, "loss": 0.3428, "step": 14047 }, { "epoch": 2.04, "grad_norm": 7.810328960418701, "learning_rate": 5.016458217582665e-07, "loss": 0.3432, "step": 14048 }, { "epoch": 2.04, "grad_norm": 7.215529918670654, "learning_rate": 5.015086011931631e-07, "loss": 0.3819, "step": 14049 }, { "epoch": 2.04, "grad_norm": 8.425016403198242, "learning_rate": 5.013713931169516e-07, "loss": 0.3549, "step": 14050 }, { "epoch": 2.04, "grad_norm": 7.214433670043945, "learning_rate": 5.012341975330698e-07, "loss": 0.3273, "step": 14051 }, { "epoch": 2.04, "grad_norm": 6.8472065925598145, "learning_rate": 5.010970144449547e-07, "loss": 0.3138, "step": 14052 }, { "epoch": 2.04, "grad_norm": 7.301265716552734, "learning_rate": 5.009598438560431e-07, "loss": 0.3542, "step": 14053 }, { "epoch": 2.04, "grad_norm": 7.638596534729004, "learning_rate": 5.008226857697724e-07, "loss": 0.3953, "step": 14054 }, { "epoch": 2.04, "grad_norm": 8.139503479003906, "learning_rate": 5.006855401895774e-07, "loss": 0.3917, "step": 14055 }, { "epoch": 2.04, "grad_norm": 8.101356506347656, "learning_rate": 5.005484071188956e-07, "loss": 0.3615, "step": 14056 }, { "epoch": 2.04, "grad_norm": 7.9505934715271, "learning_rate": 5.004112865611613e-07, "loss": 0.3688, "step": 14057 }, { "epoch": 2.04, "grad_norm": 7.153537273406982, "learning_rate": 5.00274178519811e-07, "loss": 0.3235, "step": 14058 }, { "epoch": 2.04, "grad_norm": 7.597351551055908, "learning_rate": 5.001370829982788e-07, "loss": 0.34, "step": 14059 }, { "epoch": 2.04, "grad_norm": 8.634501457214355, "learning_rate": 5.000000000000002e-07, "loss": 0.3315, "step": 14060 }, { "epoch": 2.04, "grad_norm": 7.5536885261535645, "learning_rate": 4.998629295284089e-07, "loss": 0.3334, "step": 14061 }, { "epoch": 2.04, "grad_norm": 7.294126510620117, "learning_rate": 4.997258715869392e-07, "loss": 0.2978, "step": 14062 }, { "epoch": 2.04, "grad_norm": 8.997429847717285, "learning_rate": 4.995888261790251e-07, "loss": 0.4094, "step": 14063 }, { "epoch": 2.04, "grad_norm": 8.13675308227539, "learning_rate": 4.994517933080999e-07, "loss": 0.3558, "step": 14064 }, { "epoch": 2.04, "grad_norm": 8.90516185760498, "learning_rate": 4.993147729775964e-07, "loss": 0.3297, "step": 14065 }, { "epoch": 2.04, "grad_norm": 7.404720306396484, "learning_rate": 4.991777651909481e-07, "loss": 0.3306, "step": 14066 }, { "epoch": 2.04, "grad_norm": 7.6995463371276855, "learning_rate": 4.99040769951587e-07, "loss": 0.4015, "step": 14067 }, { "epoch": 2.04, "grad_norm": 8.970481872558594, "learning_rate": 4.989037872629454e-07, "loss": 0.3676, "step": 14068 }, { "epoch": 2.04, "grad_norm": 7.355550765991211, "learning_rate": 4.987668171284551e-07, "loss": 0.3885, "step": 14069 }, { "epoch": 2.04, "grad_norm": 7.329992771148682, "learning_rate": 4.98629859551548e-07, "loss": 0.3188, "step": 14070 }, { "epoch": 2.04, "grad_norm": 8.879352569580078, "learning_rate": 4.984929145356551e-07, "loss": 0.4201, "step": 14071 }, { "epoch": 2.04, "grad_norm": 9.217242240905762, "learning_rate": 4.983559820842073e-07, "loss": 0.3782, "step": 14072 }, { "epoch": 2.04, "grad_norm": 7.9494171142578125, "learning_rate": 4.982190622006352e-07, "loss": 0.3517, "step": 14073 }, { "epoch": 2.04, "grad_norm": 8.707802772521973, "learning_rate": 4.980821548883692e-07, "loss": 0.4197, "step": 14074 }, { "epoch": 2.04, "grad_norm": 8.248334884643555, "learning_rate": 4.979452601508398e-07, "loss": 0.3039, "step": 14075 }, { "epoch": 2.04, "grad_norm": 7.084409713745117, "learning_rate": 4.978083779914754e-07, "loss": 0.3548, "step": 14076 }, { "epoch": 2.04, "grad_norm": 8.033324241638184, "learning_rate": 4.976715084137068e-07, "loss": 0.3683, "step": 14077 }, { "epoch": 2.04, "grad_norm": 8.284417152404785, "learning_rate": 4.975346514209615e-07, "loss": 0.4235, "step": 14078 }, { "epoch": 2.04, "grad_norm": 9.542804718017578, "learning_rate": 4.9739780701667e-07, "loss": 0.4124, "step": 14079 }, { "epoch": 2.04, "grad_norm": 8.853075981140137, "learning_rate": 4.972609752042589e-07, "loss": 0.3759, "step": 14080 }, { "epoch": 2.04, "grad_norm": 8.286161422729492, "learning_rate": 4.971241559871581e-07, "loss": 0.4084, "step": 14081 }, { "epoch": 2.04, "grad_norm": 7.603381633758545, "learning_rate": 4.96987349368794e-07, "loss": 0.3507, "step": 14082 }, { "epoch": 2.04, "grad_norm": 7.39542293548584, "learning_rate": 4.968505553525947e-07, "loss": 0.3813, "step": 14083 }, { "epoch": 2.04, "grad_norm": 9.078370094299316, "learning_rate": 4.967137739419871e-07, "loss": 0.389, "step": 14084 }, { "epoch": 2.04, "grad_norm": 9.070240020751953, "learning_rate": 4.965770051403981e-07, "loss": 0.3567, "step": 14085 }, { "epoch": 2.04, "grad_norm": 7.433293342590332, "learning_rate": 4.964402489512543e-07, "loss": 0.3102, "step": 14086 }, { "epoch": 2.04, "grad_norm": 8.117212295532227, "learning_rate": 4.963035053779819e-07, "loss": 0.3957, "step": 14087 }, { "epoch": 2.04, "grad_norm": 7.704111576080322, "learning_rate": 4.961667744240068e-07, "loss": 0.3323, "step": 14088 }, { "epoch": 2.04, "grad_norm": 9.556930541992188, "learning_rate": 4.960300560927547e-07, "loss": 0.4121, "step": 14089 }, { "epoch": 2.04, "grad_norm": 6.847438812255859, "learning_rate": 4.958933503876501e-07, "loss": 0.3175, "step": 14090 }, { "epoch": 2.04, "grad_norm": 8.146403312683105, "learning_rate": 4.957566573121192e-07, "loss": 0.3894, "step": 14091 }, { "epoch": 2.04, "grad_norm": 7.618268013000488, "learning_rate": 4.956199768695852e-07, "loss": 0.3325, "step": 14092 }, { "epoch": 2.04, "grad_norm": 9.005361557006836, "learning_rate": 4.95483309063474e-07, "loss": 0.4223, "step": 14093 }, { "epoch": 2.04, "grad_norm": 7.782652854919434, "learning_rate": 4.953466538972079e-07, "loss": 0.3863, "step": 14094 }, { "epoch": 2.05, "grad_norm": 8.153862953186035, "learning_rate": 4.952100113742117e-07, "loss": 0.3862, "step": 14095 }, { "epoch": 2.05, "grad_norm": 8.562944412231445, "learning_rate": 4.95073381497909e-07, "loss": 0.3835, "step": 14096 }, { "epoch": 2.05, "grad_norm": 7.789132118225098, "learning_rate": 4.949367642717215e-07, "loss": 0.3761, "step": 14097 }, { "epoch": 2.05, "grad_norm": 8.111475944519043, "learning_rate": 4.948001596990734e-07, "loss": 0.3719, "step": 14098 }, { "epoch": 2.05, "grad_norm": 10.32861614227295, "learning_rate": 4.946635677833858e-07, "loss": 0.4261, "step": 14099 }, { "epoch": 2.05, "grad_norm": 8.136153221130371, "learning_rate": 4.94526988528082e-07, "loss": 0.3928, "step": 14100 }, { "epoch": 2.05, "grad_norm": 7.25597620010376, "learning_rate": 4.943904219365831e-07, "loss": 0.3542, "step": 14101 }, { "epoch": 2.05, "grad_norm": 7.725212097167969, "learning_rate": 4.942538680123105e-07, "loss": 0.3152, "step": 14102 }, { "epoch": 2.05, "grad_norm": 8.195236206054688, "learning_rate": 4.941173267586854e-07, "loss": 0.3253, "step": 14103 }, { "epoch": 2.05, "grad_norm": 8.24344539642334, "learning_rate": 4.939807981791289e-07, "loss": 0.3809, "step": 14104 }, { "epoch": 2.05, "grad_norm": 7.600449562072754, "learning_rate": 4.938442822770611e-07, "loss": 0.3722, "step": 14105 }, { "epoch": 2.05, "grad_norm": 9.375909805297852, "learning_rate": 4.937077790559025e-07, "loss": 0.3645, "step": 14106 }, { "epoch": 2.05, "grad_norm": 7.599506855010986, "learning_rate": 4.935712885190729e-07, "loss": 0.37, "step": 14107 }, { "epoch": 2.05, "grad_norm": 7.746842384338379, "learning_rate": 4.934348106699917e-07, "loss": 0.3203, "step": 14108 }, { "epoch": 2.05, "grad_norm": 9.91453742980957, "learning_rate": 4.932983455120783e-07, "loss": 0.4074, "step": 14109 }, { "epoch": 2.05, "grad_norm": 7.692968368530273, "learning_rate": 4.931618930487519e-07, "loss": 0.3321, "step": 14110 }, { "epoch": 2.05, "grad_norm": 8.296586990356445, "learning_rate": 4.930254532834299e-07, "loss": 0.3764, "step": 14111 }, { "epoch": 2.05, "grad_norm": 7.544870853424072, "learning_rate": 4.928890262195323e-07, "loss": 0.2936, "step": 14112 }, { "epoch": 2.05, "grad_norm": 8.04346752166748, "learning_rate": 4.927526118604755e-07, "loss": 0.3621, "step": 14113 }, { "epoch": 2.05, "grad_norm": 9.293146133422852, "learning_rate": 4.926162102096784e-07, "loss": 0.3633, "step": 14114 }, { "epoch": 2.05, "grad_norm": 9.570271492004395, "learning_rate": 4.924798212705571e-07, "loss": 0.4253, "step": 14115 }, { "epoch": 2.05, "grad_norm": 7.95360803604126, "learning_rate": 4.923434450465299e-07, "loss": 0.3524, "step": 14116 }, { "epoch": 2.05, "grad_norm": 9.448090553283691, "learning_rate": 4.922070815410125e-07, "loss": 0.4481, "step": 14117 }, { "epoch": 2.05, "grad_norm": 7.305377960205078, "learning_rate": 4.920707307574217e-07, "loss": 0.3579, "step": 14118 }, { "epoch": 2.05, "grad_norm": 8.536608695983887, "learning_rate": 4.919343926991733e-07, "loss": 0.4209, "step": 14119 }, { "epoch": 2.05, "grad_norm": 8.156793594360352, "learning_rate": 4.917980673696832e-07, "loss": 0.3886, "step": 14120 }, { "epoch": 2.05, "grad_norm": 7.068417549133301, "learning_rate": 4.916617547723668e-07, "loss": 0.2987, "step": 14121 }, { "epoch": 2.05, "grad_norm": 7.204789161682129, "learning_rate": 4.915254549106393e-07, "loss": 0.375, "step": 14122 }, { "epoch": 2.05, "grad_norm": 7.847070693969727, "learning_rate": 4.913891677879152e-07, "loss": 0.3684, "step": 14123 }, { "epoch": 2.05, "grad_norm": 8.660595893859863, "learning_rate": 4.912528934076091e-07, "loss": 0.3717, "step": 14124 }, { "epoch": 2.05, "grad_norm": 9.160528182983398, "learning_rate": 4.911166317731351e-07, "loss": 0.3954, "step": 14125 }, { "epoch": 2.05, "grad_norm": 8.365643501281738, "learning_rate": 4.909803828879071e-07, "loss": 0.3668, "step": 14126 }, { "epoch": 2.05, "grad_norm": 8.523250579833984, "learning_rate": 4.908441467553384e-07, "loss": 0.3296, "step": 14127 }, { "epoch": 2.05, "grad_norm": 7.81678581237793, "learning_rate": 4.907079233788424e-07, "loss": 0.3274, "step": 14128 }, { "epoch": 2.05, "grad_norm": 8.341036796569824, "learning_rate": 4.905717127618321e-07, "loss": 0.4175, "step": 14129 }, { "epoch": 2.05, "grad_norm": 8.207213401794434, "learning_rate": 4.904355149077195e-07, "loss": 0.3815, "step": 14130 }, { "epoch": 2.05, "grad_norm": 7.176718235015869, "learning_rate": 4.902993298199176e-07, "loss": 0.3433, "step": 14131 }, { "epoch": 2.05, "grad_norm": 7.917751789093018, "learning_rate": 4.901631575018371e-07, "loss": 0.3212, "step": 14132 }, { "epoch": 2.05, "grad_norm": 7.578814506530762, "learning_rate": 4.90026997956891e-07, "loss": 0.3363, "step": 14133 }, { "epoch": 2.05, "grad_norm": 8.09982967376709, "learning_rate": 4.898908511884891e-07, "loss": 0.4378, "step": 14134 }, { "epoch": 2.05, "grad_norm": 6.835425853729248, "learning_rate": 4.897547172000438e-07, "loss": 0.2876, "step": 14135 }, { "epoch": 2.05, "grad_norm": 7.107482433319092, "learning_rate": 4.896185959949647e-07, "loss": 0.3062, "step": 14136 }, { "epoch": 2.05, "grad_norm": 7.090035915374756, "learning_rate": 4.894824875766623e-07, "loss": 0.3011, "step": 14137 }, { "epoch": 2.05, "grad_norm": 7.90765905380249, "learning_rate": 4.893463919485467e-07, "loss": 0.3282, "step": 14138 }, { "epoch": 2.05, "grad_norm": 8.413500785827637, "learning_rate": 4.892103091140276e-07, "loss": 0.3431, "step": 14139 }, { "epoch": 2.05, "grad_norm": 7.590681552886963, "learning_rate": 4.890742390765141e-07, "loss": 0.3503, "step": 14140 }, { "epoch": 2.05, "grad_norm": 7.349350452423096, "learning_rate": 4.889381818394156e-07, "loss": 0.3745, "step": 14141 }, { "epoch": 2.05, "grad_norm": 8.662604331970215, "learning_rate": 4.888021374061404e-07, "loss": 0.3662, "step": 14142 }, { "epoch": 2.05, "grad_norm": 7.676225185394287, "learning_rate": 4.886661057800971e-07, "loss": 0.36, "step": 14143 }, { "epoch": 2.05, "grad_norm": 8.863238334655762, "learning_rate": 4.885300869646937e-07, "loss": 0.4055, "step": 14144 }, { "epoch": 2.05, "grad_norm": 8.758094787597656, "learning_rate": 4.883940809633383e-07, "loss": 0.3481, "step": 14145 }, { "epoch": 2.05, "grad_norm": 9.22331714630127, "learning_rate": 4.882580877794371e-07, "loss": 0.3836, "step": 14146 }, { "epoch": 2.05, "grad_norm": 9.02863883972168, "learning_rate": 4.881221074163988e-07, "loss": 0.4123, "step": 14147 }, { "epoch": 2.05, "grad_norm": 8.786596298217773, "learning_rate": 4.879861398776286e-07, "loss": 0.4224, "step": 14148 }, { "epoch": 2.05, "grad_norm": 7.4014434814453125, "learning_rate": 4.878501851665342e-07, "loss": 0.3455, "step": 14149 }, { "epoch": 2.05, "grad_norm": 7.345860958099365, "learning_rate": 4.877142432865212e-07, "loss": 0.348, "step": 14150 }, { "epoch": 2.05, "grad_norm": 8.305791854858398, "learning_rate": 4.875783142409955e-07, "loss": 0.4044, "step": 14151 }, { "epoch": 2.05, "grad_norm": 8.33195972442627, "learning_rate": 4.874423980333629e-07, "loss": 0.3377, "step": 14152 }, { "epoch": 2.05, "grad_norm": 7.7554192543029785, "learning_rate": 4.873064946670274e-07, "loss": 0.3784, "step": 14153 }, { "epoch": 2.05, "grad_norm": 7.603103160858154, "learning_rate": 4.871706041453954e-07, "loss": 0.4004, "step": 14154 }, { "epoch": 2.05, "grad_norm": 8.465290069580078, "learning_rate": 4.870347264718698e-07, "loss": 0.3529, "step": 14155 }, { "epoch": 2.05, "grad_norm": 7.831790447235107, "learning_rate": 4.868988616498566e-07, "loss": 0.3403, "step": 14156 }, { "epoch": 2.05, "grad_norm": 7.568166255950928, "learning_rate": 4.867630096827583e-07, "loss": 0.3072, "step": 14157 }, { "epoch": 2.05, "grad_norm": 8.359049797058105, "learning_rate": 4.866271705739789e-07, "loss": 0.3465, "step": 14158 }, { "epoch": 2.05, "grad_norm": 8.121703147888184, "learning_rate": 4.864913443269214e-07, "loss": 0.3606, "step": 14159 }, { "epoch": 2.05, "grad_norm": 7.174234867095947, "learning_rate": 4.863555309449891e-07, "loss": 0.3545, "step": 14160 }, { "epoch": 2.05, "grad_norm": 7.28997278213501, "learning_rate": 4.862197304315842e-07, "loss": 0.2964, "step": 14161 }, { "epoch": 2.05, "grad_norm": 8.459335327148438, "learning_rate": 4.860839427901093e-07, "loss": 0.3158, "step": 14162 }, { "epoch": 2.05, "grad_norm": 9.867496490478516, "learning_rate": 4.859481680239662e-07, "loss": 0.3423, "step": 14163 }, { "epoch": 2.06, "grad_norm": 7.089743614196777, "learning_rate": 4.858124061365565e-07, "loss": 0.3453, "step": 14164 }, { "epoch": 2.06, "grad_norm": 10.054208755493164, "learning_rate": 4.856766571312814e-07, "loss": 0.4333, "step": 14165 }, { "epoch": 2.06, "grad_norm": 8.064559936523438, "learning_rate": 4.855409210115425e-07, "loss": 0.3569, "step": 14166 }, { "epoch": 2.06, "grad_norm": 7.3183512687683105, "learning_rate": 4.85405197780739e-07, "loss": 0.302, "step": 14167 }, { "epoch": 2.06, "grad_norm": 10.74886703491211, "learning_rate": 4.85269487442273e-07, "loss": 0.4959, "step": 14168 }, { "epoch": 2.06, "grad_norm": 8.492502212524414, "learning_rate": 4.851337899995428e-07, "loss": 0.4099, "step": 14169 }, { "epoch": 2.06, "grad_norm": 8.322526931762695, "learning_rate": 4.849981054559497e-07, "loss": 0.362, "step": 14170 }, { "epoch": 2.06, "grad_norm": 8.640020370483398, "learning_rate": 4.848624338148915e-07, "loss": 0.3588, "step": 14171 }, { "epoch": 2.06, "grad_norm": 8.210846900939941, "learning_rate": 4.847267750797689e-07, "loss": 0.3747, "step": 14172 }, { "epoch": 2.06, "grad_norm": 8.542533874511719, "learning_rate": 4.845911292539792e-07, "loss": 0.348, "step": 14173 }, { "epoch": 2.06, "grad_norm": 7.650836944580078, "learning_rate": 4.84455496340921e-07, "loss": 0.3233, "step": 14174 }, { "epoch": 2.06, "grad_norm": 7.244509696960449, "learning_rate": 4.843198763439934e-07, "loss": 0.3393, "step": 14175 }, { "epoch": 2.06, "grad_norm": 9.241531372070312, "learning_rate": 4.841842692665926e-07, "loss": 0.3784, "step": 14176 }, { "epoch": 2.06, "grad_norm": 9.143563270568848, "learning_rate": 4.840486751121176e-07, "loss": 0.3827, "step": 14177 }, { "epoch": 2.06, "grad_norm": 7.6168951988220215, "learning_rate": 4.839130938839645e-07, "loss": 0.3581, "step": 14178 }, { "epoch": 2.06, "grad_norm": 9.15984058380127, "learning_rate": 4.837775255855301e-07, "loss": 0.4157, "step": 14179 }, { "epoch": 2.06, "grad_norm": 7.163436412811279, "learning_rate": 4.83641970220211e-07, "loss": 0.3562, "step": 14180 }, { "epoch": 2.06, "grad_norm": 7.957094192504883, "learning_rate": 4.835064277914035e-07, "loss": 0.3273, "step": 14181 }, { "epoch": 2.06, "grad_norm": 7.8046393394470215, "learning_rate": 4.833708983025031e-07, "loss": 0.3635, "step": 14182 }, { "epoch": 2.06, "grad_norm": 8.277288436889648, "learning_rate": 4.832353817569055e-07, "loss": 0.4447, "step": 14183 }, { "epoch": 2.06, "grad_norm": 8.271781921386719, "learning_rate": 4.830998781580057e-07, "loss": 0.3659, "step": 14184 }, { "epoch": 2.06, "grad_norm": 8.10306453704834, "learning_rate": 4.829643875091986e-07, "loss": 0.3568, "step": 14185 }, { "epoch": 2.06, "grad_norm": 7.794251918792725, "learning_rate": 4.828289098138787e-07, "loss": 0.4051, "step": 14186 }, { "epoch": 2.06, "grad_norm": 7.414971828460693, "learning_rate": 4.826934450754405e-07, "loss": 0.3543, "step": 14187 }, { "epoch": 2.06, "grad_norm": 6.926959991455078, "learning_rate": 4.825579932972766e-07, "loss": 0.3239, "step": 14188 }, { "epoch": 2.06, "grad_norm": 7.975642681121826, "learning_rate": 4.824225544827824e-07, "loss": 0.379, "step": 14189 }, { "epoch": 2.06, "grad_norm": 7.784983158111572, "learning_rate": 4.822871286353493e-07, "loss": 0.3528, "step": 14190 }, { "epoch": 2.06, "grad_norm": 7.231323719024658, "learning_rate": 4.821517157583718e-07, "loss": 0.3734, "step": 14191 }, { "epoch": 2.06, "grad_norm": 8.697402000427246, "learning_rate": 4.820163158552411e-07, "loss": 0.422, "step": 14192 }, { "epoch": 2.06, "grad_norm": 7.823329925537109, "learning_rate": 4.818809289293502e-07, "loss": 0.3369, "step": 14193 }, { "epoch": 2.06, "grad_norm": 7.664574146270752, "learning_rate": 4.817455549840906e-07, "loss": 0.3211, "step": 14194 }, { "epoch": 2.06, "grad_norm": 6.739430904388428, "learning_rate": 4.816101940228541e-07, "loss": 0.2969, "step": 14195 }, { "epoch": 2.06, "grad_norm": 8.385177612304688, "learning_rate": 4.814748460490319e-07, "loss": 0.3588, "step": 14196 }, { "epoch": 2.06, "grad_norm": 7.876156330108643, "learning_rate": 4.813395110660149e-07, "loss": 0.3539, "step": 14197 }, { "epoch": 2.06, "grad_norm": 6.814352512359619, "learning_rate": 4.812041890771937e-07, "loss": 0.3166, "step": 14198 }, { "epoch": 2.06, "grad_norm": 6.898463726043701, "learning_rate": 4.810688800859586e-07, "loss": 0.3316, "step": 14199 }, { "epoch": 2.06, "grad_norm": 7.070091724395752, "learning_rate": 4.809335840956995e-07, "loss": 0.3138, "step": 14200 }, { "epoch": 2.06, "grad_norm": 9.02212905883789, "learning_rate": 4.807983011098062e-07, "loss": 0.4668, "step": 14201 }, { "epoch": 2.06, "grad_norm": 8.063758850097656, "learning_rate": 4.806630311316677e-07, "loss": 0.3791, "step": 14202 }, { "epoch": 2.06, "grad_norm": 7.116847991943359, "learning_rate": 4.805277741646732e-07, "loss": 0.3389, "step": 14203 }, { "epoch": 2.06, "grad_norm": 8.883625030517578, "learning_rate": 4.803925302122112e-07, "loss": 0.3625, "step": 14204 }, { "epoch": 2.06, "grad_norm": 8.09019660949707, "learning_rate": 4.802572992776701e-07, "loss": 0.3813, "step": 14205 }, { "epoch": 2.06, "grad_norm": 8.603925704956055, "learning_rate": 4.801220813644379e-07, "loss": 0.3994, "step": 14206 }, { "epoch": 2.06, "grad_norm": 8.130364418029785, "learning_rate": 4.799868764759022e-07, "loss": 0.3615, "step": 14207 }, { "epoch": 2.06, "grad_norm": 7.737051010131836, "learning_rate": 4.798516846154508e-07, "loss": 0.3421, "step": 14208 }, { "epoch": 2.06, "grad_norm": 7.68867301940918, "learning_rate": 4.797165057864697e-07, "loss": 0.337, "step": 14209 }, { "epoch": 2.06, "grad_norm": 9.542705535888672, "learning_rate": 4.795813399923466e-07, "loss": 0.3686, "step": 14210 }, { "epoch": 2.06, "grad_norm": 8.516121864318848, "learning_rate": 4.794461872364668e-07, "loss": 0.3766, "step": 14211 }, { "epoch": 2.06, "grad_norm": 7.3035383224487305, "learning_rate": 4.793110475222178e-07, "loss": 0.3382, "step": 14212 }, { "epoch": 2.06, "grad_norm": 7.463489055633545, "learning_rate": 4.791759208529841e-07, "loss": 0.3031, "step": 14213 }, { "epoch": 2.06, "grad_norm": 7.236388683319092, "learning_rate": 4.790408072321513e-07, "loss": 0.3632, "step": 14214 }, { "epoch": 2.06, "grad_norm": 7.367934226989746, "learning_rate": 4.789057066631048e-07, "loss": 0.3546, "step": 14215 }, { "epoch": 2.06, "grad_norm": 8.026063919067383, "learning_rate": 4.78770619149229e-07, "loss": 0.3151, "step": 14216 }, { "epoch": 2.06, "grad_norm": 8.645862579345703, "learning_rate": 4.786355446939084e-07, "loss": 0.3245, "step": 14217 }, { "epoch": 2.06, "grad_norm": 8.576781272888184, "learning_rate": 4.785004833005273e-07, "loss": 0.3816, "step": 14218 }, { "epoch": 2.06, "grad_norm": 8.3772611618042, "learning_rate": 4.78365434972469e-07, "loss": 0.3119, "step": 14219 }, { "epoch": 2.06, "grad_norm": 7.561949253082275, "learning_rate": 4.782303997131173e-07, "loss": 0.3197, "step": 14220 }, { "epoch": 2.06, "grad_norm": 8.529504776000977, "learning_rate": 4.780953775258552e-07, "loss": 0.309, "step": 14221 }, { "epoch": 2.06, "grad_norm": 8.073588371276855, "learning_rate": 4.779603684140656e-07, "loss": 0.3918, "step": 14222 }, { "epoch": 2.06, "grad_norm": 8.019584655761719, "learning_rate": 4.778253723811302e-07, "loss": 0.3925, "step": 14223 }, { "epoch": 2.06, "grad_norm": 9.192255973815918, "learning_rate": 4.776903894304323e-07, "loss": 0.3869, "step": 14224 }, { "epoch": 2.06, "grad_norm": 6.699491024017334, "learning_rate": 4.775554195653522e-07, "loss": 0.3168, "step": 14225 }, { "epoch": 2.06, "grad_norm": 8.623550415039062, "learning_rate": 4.774204627892726e-07, "loss": 0.3737, "step": 14226 }, { "epoch": 2.06, "grad_norm": 9.442740440368652, "learning_rate": 4.772855191055747e-07, "loss": 0.3488, "step": 14227 }, { "epoch": 2.06, "grad_norm": 9.301331520080566, "learning_rate": 4.771505885176379e-07, "loss": 0.4037, "step": 14228 }, { "epoch": 2.06, "grad_norm": 7.182283878326416, "learning_rate": 4.770156710288445e-07, "loss": 0.3087, "step": 14229 }, { "epoch": 2.06, "grad_norm": 7.574580669403076, "learning_rate": 4.768807666425727e-07, "loss": 0.3359, "step": 14230 }, { "epoch": 2.06, "grad_norm": 8.37148380279541, "learning_rate": 4.7674587536220425e-07, "loss": 0.3814, "step": 14231 }, { "epoch": 2.07, "grad_norm": 6.893813610076904, "learning_rate": 4.766109971911169e-07, "loss": 0.3245, "step": 14232 }, { "epoch": 2.07, "grad_norm": 9.303677558898926, "learning_rate": 4.7647613213269125e-07, "loss": 0.4147, "step": 14233 }, { "epoch": 2.07, "grad_norm": 8.11386775970459, "learning_rate": 4.763412801903052e-07, "loss": 0.3369, "step": 14234 }, { "epoch": 2.07, "grad_norm": 9.213088035583496, "learning_rate": 4.7620644136733744e-07, "loss": 0.3993, "step": 14235 }, { "epoch": 2.07, "grad_norm": 7.779688358306885, "learning_rate": 4.760716156671664e-07, "loss": 0.3233, "step": 14236 }, { "epoch": 2.07, "grad_norm": 8.155416488647461, "learning_rate": 4.759368030931695e-07, "loss": 0.3838, "step": 14237 }, { "epoch": 2.07, "grad_norm": 8.198294639587402, "learning_rate": 4.7580200364872456e-07, "loss": 0.3577, "step": 14238 }, { "epoch": 2.07, "grad_norm": 8.153785705566406, "learning_rate": 4.756672173372087e-07, "loss": 0.3411, "step": 14239 }, { "epoch": 2.07, "grad_norm": 7.654360771179199, "learning_rate": 4.755324441619989e-07, "loss": 0.3519, "step": 14240 }, { "epoch": 2.07, "grad_norm": 7.737811088562012, "learning_rate": 4.753976841264714e-07, "loss": 0.3756, "step": 14241 }, { "epoch": 2.07, "grad_norm": 8.501666069030762, "learning_rate": 4.752629372340026e-07, "loss": 0.3471, "step": 14242 }, { "epoch": 2.07, "grad_norm": 9.656256675720215, "learning_rate": 4.751282034879686e-07, "loss": 0.4119, "step": 14243 }, { "epoch": 2.07, "grad_norm": 7.221543788909912, "learning_rate": 4.749934828917439e-07, "loss": 0.3384, "step": 14244 }, { "epoch": 2.07, "grad_norm": 7.725869655609131, "learning_rate": 4.748587754487051e-07, "loss": 0.3351, "step": 14245 }, { "epoch": 2.07, "grad_norm": 9.341224670410156, "learning_rate": 4.7472408116222585e-07, "loss": 0.331, "step": 14246 }, { "epoch": 2.07, "grad_norm": 7.5850934982299805, "learning_rate": 4.745894000356819e-07, "loss": 0.3565, "step": 14247 }, { "epoch": 2.07, "grad_norm": 10.078791618347168, "learning_rate": 4.7445473207244644e-07, "loss": 0.3566, "step": 14248 }, { "epoch": 2.07, "grad_norm": 8.257739067077637, "learning_rate": 4.7432007727589387e-07, "loss": 0.3536, "step": 14249 }, { "epoch": 2.07, "grad_norm": 7.827880382537842, "learning_rate": 4.741854356493975e-07, "loss": 0.3764, "step": 14250 }, { "epoch": 2.07, "grad_norm": 8.096892356872559, "learning_rate": 4.7405080719633075e-07, "loss": 0.3328, "step": 14251 }, { "epoch": 2.07, "grad_norm": 8.004596710205078, "learning_rate": 4.739161919200664e-07, "loss": 0.3292, "step": 14252 }, { "epoch": 2.07, "grad_norm": 9.529650688171387, "learning_rate": 4.7378158982397676e-07, "loss": 0.4054, "step": 14253 }, { "epoch": 2.07, "grad_norm": 7.509002208709717, "learning_rate": 4.7364700091143507e-07, "loss": 0.3514, "step": 14254 }, { "epoch": 2.07, "grad_norm": 8.985172271728516, "learning_rate": 4.7351242518581215e-07, "loss": 0.3368, "step": 14255 }, { "epoch": 2.07, "grad_norm": 7.963187217712402, "learning_rate": 4.7337786265047995e-07, "loss": 0.3013, "step": 14256 }, { "epoch": 2.07, "grad_norm": 9.035687446594238, "learning_rate": 4.7324331330880986e-07, "loss": 0.4015, "step": 14257 }, { "epoch": 2.07, "grad_norm": 9.893167495727539, "learning_rate": 4.731087771641725e-07, "loss": 0.3973, "step": 14258 }, { "epoch": 2.07, "grad_norm": 7.533170700073242, "learning_rate": 4.7297425421993873e-07, "loss": 0.2814, "step": 14259 }, { "epoch": 2.07, "grad_norm": 7.944460868835449, "learning_rate": 4.7283974447947863e-07, "loss": 0.3137, "step": 14260 }, { "epoch": 2.07, "grad_norm": 8.922662734985352, "learning_rate": 4.727052479461622e-07, "loss": 0.4069, "step": 14261 }, { "epoch": 2.07, "grad_norm": 7.320334434509277, "learning_rate": 4.725707646233591e-07, "loss": 0.2746, "step": 14262 }, { "epoch": 2.07, "grad_norm": 8.16474723815918, "learning_rate": 4.7243629451443844e-07, "loss": 0.3469, "step": 14263 }, { "epoch": 2.07, "grad_norm": 8.418008804321289, "learning_rate": 4.7230183762276955e-07, "loss": 0.3338, "step": 14264 }, { "epoch": 2.07, "grad_norm": 7.540589332580566, "learning_rate": 4.7216739395172e-07, "loss": 0.3814, "step": 14265 }, { "epoch": 2.07, "grad_norm": 6.937183856964111, "learning_rate": 4.720329635046595e-07, "loss": 0.2785, "step": 14266 }, { "epoch": 2.07, "grad_norm": 8.547776222229004, "learning_rate": 4.718985462849545e-07, "loss": 0.3893, "step": 14267 }, { "epoch": 2.07, "grad_norm": 7.73179292678833, "learning_rate": 4.7176414229597404e-07, "loss": 0.3604, "step": 14268 }, { "epoch": 2.07, "grad_norm": 7.3150177001953125, "learning_rate": 4.7162975154108443e-07, "loss": 0.3573, "step": 14269 }, { "epoch": 2.07, "grad_norm": 6.88646936416626, "learning_rate": 4.714953740236528e-07, "loss": 0.3289, "step": 14270 }, { "epoch": 2.07, "grad_norm": 7.7365899085998535, "learning_rate": 4.7136100974704596e-07, "loss": 0.3223, "step": 14271 }, { "epoch": 2.07, "grad_norm": 7.589440822601318, "learning_rate": 4.7122665871462996e-07, "loss": 0.3769, "step": 14272 }, { "epoch": 2.07, "grad_norm": 7.4955949783325195, "learning_rate": 4.7109232092977093e-07, "loss": 0.3734, "step": 14273 }, { "epoch": 2.07, "grad_norm": 7.360832691192627, "learning_rate": 4.7095799639583434e-07, "loss": 0.3396, "step": 14274 }, { "epoch": 2.07, "grad_norm": 7.664430141448975, "learning_rate": 4.7082368511618555e-07, "loss": 0.3746, "step": 14275 }, { "epoch": 2.07, "grad_norm": 7.7389140129089355, "learning_rate": 4.706893870941896e-07, "loss": 0.3518, "step": 14276 }, { "epoch": 2.07, "grad_norm": 8.918828964233398, "learning_rate": 4.705551023332109e-07, "loss": 0.361, "step": 14277 }, { "epoch": 2.07, "grad_norm": 7.2999444007873535, "learning_rate": 4.704208308366139e-07, "loss": 0.3619, "step": 14278 }, { "epoch": 2.07, "grad_norm": 7.890316486358643, "learning_rate": 4.702865726077625e-07, "loss": 0.3703, "step": 14279 }, { "epoch": 2.07, "grad_norm": 8.61853313446045, "learning_rate": 4.701523276500203e-07, "loss": 0.4066, "step": 14280 }, { "epoch": 2.07, "grad_norm": 8.248120307922363, "learning_rate": 4.700180959667508e-07, "loss": 0.402, "step": 14281 }, { "epoch": 2.07, "grad_norm": 8.125862121582031, "learning_rate": 4.6988387756131667e-07, "loss": 0.3567, "step": 14282 }, { "epoch": 2.07, "grad_norm": 8.680303573608398, "learning_rate": 4.6974967243708087e-07, "loss": 0.4153, "step": 14283 }, { "epoch": 2.07, "grad_norm": 7.216590881347656, "learning_rate": 4.696154805974049e-07, "loss": 0.3347, "step": 14284 }, { "epoch": 2.07, "grad_norm": 7.233333587646484, "learning_rate": 4.6948130204565196e-07, "loss": 0.2911, "step": 14285 }, { "epoch": 2.07, "grad_norm": 8.587321281433105, "learning_rate": 4.693471367851822e-07, "loss": 0.3967, "step": 14286 }, { "epoch": 2.07, "grad_norm": 8.791250228881836, "learning_rate": 4.6921298481935856e-07, "loss": 0.381, "step": 14287 }, { "epoch": 2.07, "grad_norm": 9.695539474487305, "learning_rate": 4.6907884615154024e-07, "loss": 0.3195, "step": 14288 }, { "epoch": 2.07, "grad_norm": 7.871456623077393, "learning_rate": 4.689447207850896e-07, "loss": 0.3721, "step": 14289 }, { "epoch": 2.07, "grad_norm": 7.863250732421875, "learning_rate": 4.688106087233659e-07, "loss": 0.3491, "step": 14290 }, { "epoch": 2.07, "grad_norm": 9.136748313903809, "learning_rate": 4.686765099697291e-07, "loss": 0.2738, "step": 14291 }, { "epoch": 2.07, "grad_norm": 10.142611503601074, "learning_rate": 4.685424245275391e-07, "loss": 0.4194, "step": 14292 }, { "epoch": 2.07, "grad_norm": 7.18649959564209, "learning_rate": 4.684083524001551e-07, "loss": 0.3315, "step": 14293 }, { "epoch": 2.07, "grad_norm": 7.254025459289551, "learning_rate": 4.682742935909361e-07, "loss": 0.3225, "step": 14294 }, { "epoch": 2.07, "grad_norm": 8.098136901855469, "learning_rate": 4.681402481032407e-07, "loss": 0.3745, "step": 14295 }, { "epoch": 2.07, "grad_norm": 7.8740339279174805, "learning_rate": 4.680062159404272e-07, "loss": 0.3712, "step": 14296 }, { "epoch": 2.07, "grad_norm": 8.279154777526855, "learning_rate": 4.678721971058536e-07, "loss": 0.3759, "step": 14297 }, { "epoch": 2.07, "grad_norm": 10.49545669555664, "learning_rate": 4.6773819160287743e-07, "loss": 0.4113, "step": 14298 }, { "epoch": 2.07, "grad_norm": 7.734718322753906, "learning_rate": 4.676041994348564e-07, "loss": 0.3443, "step": 14299 }, { "epoch": 2.07, "grad_norm": 7.445781707763672, "learning_rate": 4.674702206051464e-07, "loss": 0.3087, "step": 14300 }, { "epoch": 2.08, "grad_norm": 6.971195697784424, "learning_rate": 4.673362551171055e-07, "loss": 0.3681, "step": 14301 }, { "epoch": 2.08, "grad_norm": 8.419143676757812, "learning_rate": 4.6720230297408856e-07, "loss": 0.3679, "step": 14302 }, { "epoch": 2.08, "grad_norm": 6.9566650390625, "learning_rate": 4.6706836417945283e-07, "loss": 0.3223, "step": 14303 }, { "epoch": 2.08, "grad_norm": 9.806829452514648, "learning_rate": 4.6693443873655303e-07, "loss": 0.4715, "step": 14304 }, { "epoch": 2.08, "grad_norm": 8.620160102844238, "learning_rate": 4.6680052664874436e-07, "loss": 0.3592, "step": 14305 }, { "epoch": 2.08, "grad_norm": 8.239286422729492, "learning_rate": 4.666666279193828e-07, "loss": 0.36, "step": 14306 }, { "epoch": 2.08, "grad_norm": 7.139820098876953, "learning_rate": 4.665327425518216e-07, "loss": 0.2962, "step": 14307 }, { "epoch": 2.08, "grad_norm": 8.335116386413574, "learning_rate": 4.663988705494165e-07, "loss": 0.403, "step": 14308 }, { "epoch": 2.08, "grad_norm": 7.174566745758057, "learning_rate": 4.6626501191552023e-07, "loss": 0.3802, "step": 14309 }, { "epoch": 2.08, "grad_norm": 8.443359375, "learning_rate": 4.66131166653487e-07, "loss": 0.416, "step": 14310 }, { "epoch": 2.08, "grad_norm": 8.548789978027344, "learning_rate": 4.6599733476666994e-07, "loss": 0.3886, "step": 14311 }, { "epoch": 2.08, "grad_norm": 8.220523834228516, "learning_rate": 4.6586351625842177e-07, "loss": 0.4097, "step": 14312 }, { "epoch": 2.08, "grad_norm": 7.7762370109558105, "learning_rate": 4.6572971113209557e-07, "loss": 0.3285, "step": 14313 }, { "epoch": 2.08, "grad_norm": 7.107708930969238, "learning_rate": 4.6559591939104325e-07, "loss": 0.3247, "step": 14314 }, { "epoch": 2.08, "grad_norm": 8.153428077697754, "learning_rate": 4.654621410386169e-07, "loss": 0.3466, "step": 14315 }, { "epoch": 2.08, "grad_norm": 7.971155643463135, "learning_rate": 4.65328376078168e-07, "loss": 0.3687, "step": 14316 }, { "epoch": 2.08, "grad_norm": 8.288392066955566, "learning_rate": 4.651946245130479e-07, "loss": 0.3636, "step": 14317 }, { "epoch": 2.08, "grad_norm": 7.983789920806885, "learning_rate": 4.65060886346608e-07, "loss": 0.4039, "step": 14318 }, { "epoch": 2.08, "grad_norm": 8.835461616516113, "learning_rate": 4.6492716158219755e-07, "loss": 0.3969, "step": 14319 }, { "epoch": 2.08, "grad_norm": 8.832202911376953, "learning_rate": 4.647934502231684e-07, "loss": 0.3592, "step": 14320 }, { "epoch": 2.08, "grad_norm": 7.398427486419678, "learning_rate": 4.6465975227286916e-07, "loss": 0.3353, "step": 14321 }, { "epoch": 2.08, "grad_norm": 7.970818996429443, "learning_rate": 4.6452606773465054e-07, "loss": 0.3684, "step": 14322 }, { "epoch": 2.08, "grad_norm": 7.537613868713379, "learning_rate": 4.643923966118606e-07, "loss": 0.3353, "step": 14323 }, { "epoch": 2.08, "grad_norm": 7.276415824890137, "learning_rate": 4.642587389078496e-07, "loss": 0.3241, "step": 14324 }, { "epoch": 2.08, "grad_norm": 7.912692070007324, "learning_rate": 4.6412509462596503e-07, "loss": 0.3813, "step": 14325 }, { "epoch": 2.08, "grad_norm": 8.30093765258789, "learning_rate": 4.6399146376955545e-07, "loss": 0.3833, "step": 14326 }, { "epoch": 2.08, "grad_norm": 10.460978507995605, "learning_rate": 4.638578463419689e-07, "loss": 0.4429, "step": 14327 }, { "epoch": 2.08, "grad_norm": 7.990890026092529, "learning_rate": 4.6372424234655284e-07, "loss": 0.3574, "step": 14328 }, { "epoch": 2.08, "grad_norm": 7.073441982269287, "learning_rate": 4.6359065178665455e-07, "loss": 0.2811, "step": 14329 }, { "epoch": 2.08, "grad_norm": 8.998788833618164, "learning_rate": 4.634570746656209e-07, "loss": 0.4226, "step": 14330 }, { "epoch": 2.08, "grad_norm": 8.497725486755371, "learning_rate": 4.6332351098679854e-07, "loss": 0.3584, "step": 14331 }, { "epoch": 2.08, "grad_norm": 7.39438533782959, "learning_rate": 4.6318996075353345e-07, "loss": 0.3551, "step": 14332 }, { "epoch": 2.08, "grad_norm": 7.401301383972168, "learning_rate": 4.630564239691718e-07, "loss": 0.3336, "step": 14333 }, { "epoch": 2.08, "grad_norm": 8.81351375579834, "learning_rate": 4.62922900637059e-07, "loss": 0.3279, "step": 14334 }, { "epoch": 2.08, "grad_norm": 7.842903137207031, "learning_rate": 4.627893907605401e-07, "loss": 0.3212, "step": 14335 }, { "epoch": 2.08, "grad_norm": 7.6268181800842285, "learning_rate": 4.626558943429603e-07, "loss": 0.3864, "step": 14336 }, { "epoch": 2.08, "grad_norm": 7.595993518829346, "learning_rate": 4.625224113876639e-07, "loss": 0.3454, "step": 14337 }, { "epoch": 2.08, "grad_norm": 9.189400672912598, "learning_rate": 4.623889418979953e-07, "loss": 0.3994, "step": 14338 }, { "epoch": 2.08, "grad_norm": 7.302354335784912, "learning_rate": 4.6225548587729843e-07, "loss": 0.2901, "step": 14339 }, { "epoch": 2.08, "grad_norm": 7.770635604858398, "learning_rate": 4.6212204332891603e-07, "loss": 0.3255, "step": 14340 }, { "epoch": 2.08, "grad_norm": 8.360011100769043, "learning_rate": 4.619886142561925e-07, "loss": 0.4, "step": 14341 }, { "epoch": 2.08, "grad_norm": 8.25833511352539, "learning_rate": 4.6185519866246937e-07, "loss": 0.3698, "step": 14342 }, { "epoch": 2.08, "grad_norm": 8.291933059692383, "learning_rate": 4.617217965510906e-07, "loss": 0.3375, "step": 14343 }, { "epoch": 2.08, "grad_norm": 9.748708724975586, "learning_rate": 4.6158840792539677e-07, "loss": 0.382, "step": 14344 }, { "epoch": 2.08, "grad_norm": 7.8663740158081055, "learning_rate": 4.6145503278873144e-07, "loss": 0.3429, "step": 14345 }, { "epoch": 2.08, "grad_norm": 8.920720100402832, "learning_rate": 4.6132167114443477e-07, "loss": 0.3557, "step": 14346 }, { "epoch": 2.08, "grad_norm": 8.642266273498535, "learning_rate": 4.611883229958483e-07, "loss": 0.3601, "step": 14347 }, { "epoch": 2.08, "grad_norm": 8.405951499938965, "learning_rate": 4.610549883463131e-07, "loss": 0.3289, "step": 14348 }, { "epoch": 2.08, "grad_norm": 7.884922027587891, "learning_rate": 4.6092166719916935e-07, "loss": 0.3613, "step": 14349 }, { "epoch": 2.08, "grad_norm": 8.531624794006348, "learning_rate": 4.607883595577573e-07, "loss": 0.3774, "step": 14350 }, { "epoch": 2.08, "grad_norm": 8.746574401855469, "learning_rate": 4.606550654254168e-07, "loss": 0.3944, "step": 14351 }, { "epoch": 2.08, "grad_norm": 10.42162036895752, "learning_rate": 4.605217848054873e-07, "loss": 0.3949, "step": 14352 }, { "epoch": 2.08, "grad_norm": 8.058723449707031, "learning_rate": 4.6038851770130795e-07, "loss": 0.3696, "step": 14353 }, { "epoch": 2.08, "grad_norm": 7.707417964935303, "learning_rate": 4.602552641162175e-07, "loss": 0.3184, "step": 14354 }, { "epoch": 2.08, "grad_norm": 8.419696807861328, "learning_rate": 4.601220240535547e-07, "loss": 0.2722, "step": 14355 }, { "epoch": 2.08, "grad_norm": 7.4971699714660645, "learning_rate": 4.5998879751665675e-07, "loss": 0.332, "step": 14356 }, { "epoch": 2.08, "grad_norm": 9.543100357055664, "learning_rate": 4.598555845088624e-07, "loss": 0.3984, "step": 14357 }, { "epoch": 2.08, "grad_norm": 7.844549179077148, "learning_rate": 4.597223850335087e-07, "loss": 0.3111, "step": 14358 }, { "epoch": 2.08, "grad_norm": 8.627326965332031, "learning_rate": 4.595891990939328e-07, "loss": 0.3723, "step": 14359 }, { "epoch": 2.08, "grad_norm": 9.74272346496582, "learning_rate": 4.5945602669347185e-07, "loss": 0.4214, "step": 14360 }, { "epoch": 2.08, "grad_norm": 7.4864182472229, "learning_rate": 4.5932286783546114e-07, "loss": 0.3433, "step": 14361 }, { "epoch": 2.08, "grad_norm": 8.221168518066406, "learning_rate": 4.5918972252323805e-07, "loss": 0.3961, "step": 14362 }, { "epoch": 2.08, "grad_norm": 8.238936424255371, "learning_rate": 4.5905659076013716e-07, "loss": 0.354, "step": 14363 }, { "epoch": 2.08, "grad_norm": 8.5907564163208, "learning_rate": 4.5892347254949513e-07, "loss": 0.3854, "step": 14364 }, { "epoch": 2.08, "grad_norm": 7.758678436279297, "learning_rate": 4.58790367894646e-07, "loss": 0.3457, "step": 14365 }, { "epoch": 2.08, "grad_norm": 7.6412272453308105, "learning_rate": 4.5865727679892473e-07, "loss": 0.3243, "step": 14366 }, { "epoch": 2.08, "grad_norm": 8.074623107910156, "learning_rate": 4.585241992656659e-07, "loss": 0.3225, "step": 14367 }, { "epoch": 2.08, "grad_norm": 6.904529094696045, "learning_rate": 4.583911352982034e-07, "loss": 0.3247, "step": 14368 }, { "epoch": 2.08, "grad_norm": 8.201900482177734, "learning_rate": 4.5825808489987103e-07, "loss": 0.3649, "step": 14369 }, { "epoch": 2.09, "grad_norm": 7.886480331420898, "learning_rate": 4.581250480740021e-07, "loss": 0.3545, "step": 14370 }, { "epoch": 2.09, "grad_norm": 9.248950958251953, "learning_rate": 4.579920248239296e-07, "loss": 0.3263, "step": 14371 }, { "epoch": 2.09, "grad_norm": 8.491926193237305, "learning_rate": 4.578590151529863e-07, "loss": 0.334, "step": 14372 }, { "epoch": 2.09, "grad_norm": 7.183786869049072, "learning_rate": 4.577260190645045e-07, "loss": 0.3278, "step": 14373 }, { "epoch": 2.09, "grad_norm": 9.143638610839844, "learning_rate": 4.575930365618165e-07, "loss": 0.4379, "step": 14374 }, { "epoch": 2.09, "grad_norm": 7.490744113922119, "learning_rate": 4.5746006764825284e-07, "loss": 0.3467, "step": 14375 }, { "epoch": 2.09, "grad_norm": 8.232168197631836, "learning_rate": 4.573271123271465e-07, "loss": 0.3679, "step": 14376 }, { "epoch": 2.09, "grad_norm": 7.769646644592285, "learning_rate": 4.571941706018269e-07, "loss": 0.3329, "step": 14377 }, { "epoch": 2.09, "grad_norm": 7.722303867340088, "learning_rate": 4.5706124247562604e-07, "loss": 0.3315, "step": 14378 }, { "epoch": 2.09, "grad_norm": 10.409274101257324, "learning_rate": 4.569283279518729e-07, "loss": 0.3764, "step": 14379 }, { "epoch": 2.09, "grad_norm": 8.438440322875977, "learning_rate": 4.5679542703389873e-07, "loss": 0.407, "step": 14380 }, { "epoch": 2.09, "grad_norm": 7.687004566192627, "learning_rate": 4.566625397250322e-07, "loss": 0.3508, "step": 14381 }, { "epoch": 2.09, "grad_norm": 8.112584114074707, "learning_rate": 4.56529666028603e-07, "loss": 0.3872, "step": 14382 }, { "epoch": 2.09, "grad_norm": 8.33722972869873, "learning_rate": 4.563968059479398e-07, "loss": 0.3752, "step": 14383 }, { "epoch": 2.09, "grad_norm": 7.889167785644531, "learning_rate": 4.562639594863712e-07, "loss": 0.3488, "step": 14384 }, { "epoch": 2.09, "grad_norm": 7.721938133239746, "learning_rate": 4.561311266472262e-07, "loss": 0.3654, "step": 14385 }, { "epoch": 2.09, "grad_norm": 8.461343765258789, "learning_rate": 4.559983074338318e-07, "loss": 0.359, "step": 14386 }, { "epoch": 2.09, "grad_norm": 8.312132835388184, "learning_rate": 4.5586550184951587e-07, "loss": 0.3429, "step": 14387 }, { "epoch": 2.09, "grad_norm": 8.285473823547363, "learning_rate": 4.5573270989760584e-07, "loss": 0.376, "step": 14388 }, { "epoch": 2.09, "grad_norm": 7.339877128601074, "learning_rate": 4.555999315814282e-07, "loss": 0.3349, "step": 14389 }, { "epoch": 2.09, "grad_norm": 8.512722969055176, "learning_rate": 4.554671669043099e-07, "loss": 0.3884, "step": 14390 }, { "epoch": 2.09, "grad_norm": 9.265531539916992, "learning_rate": 4.5533441586957675e-07, "loss": 0.3636, "step": 14391 }, { "epoch": 2.09, "grad_norm": 7.827385425567627, "learning_rate": 4.5520167848055504e-07, "loss": 0.3272, "step": 14392 }, { "epoch": 2.09, "grad_norm": 8.101859092712402, "learning_rate": 4.5506895474056993e-07, "loss": 0.323, "step": 14393 }, { "epoch": 2.09, "grad_norm": 7.117743015289307, "learning_rate": 4.549362446529468e-07, "loss": 0.3185, "step": 14394 }, { "epoch": 2.09, "grad_norm": 9.791340827941895, "learning_rate": 4.548035482210107e-07, "loss": 0.3793, "step": 14395 }, { "epoch": 2.09, "grad_norm": 8.808521270751953, "learning_rate": 4.546708654480852e-07, "loss": 0.355, "step": 14396 }, { "epoch": 2.09, "grad_norm": 8.306717872619629, "learning_rate": 4.545381963374958e-07, "loss": 0.3425, "step": 14397 }, { "epoch": 2.09, "grad_norm": 8.806166648864746, "learning_rate": 4.544055408925649e-07, "loss": 0.3547, "step": 14398 }, { "epoch": 2.09, "grad_norm": 8.506118774414062, "learning_rate": 4.5427289911661736e-07, "loss": 0.3413, "step": 14399 }, { "epoch": 2.09, "grad_norm": 7.863979816436768, "learning_rate": 4.541402710129749e-07, "loss": 0.3632, "step": 14400 }, { "epoch": 2.09, "grad_norm": 7.665111064910889, "learning_rate": 4.5400765658496185e-07, "loss": 0.3363, "step": 14401 }, { "epoch": 2.09, "grad_norm": 8.346960067749023, "learning_rate": 4.538750558358994e-07, "loss": 0.3989, "step": 14402 }, { "epoch": 2.09, "grad_norm": 8.729805946350098, "learning_rate": 4.5374246876911004e-07, "loss": 0.3409, "step": 14403 }, { "epoch": 2.09, "grad_norm": 8.737759590148926, "learning_rate": 4.536098953879156e-07, "loss": 0.3148, "step": 14404 }, { "epoch": 2.09, "grad_norm": 8.1571626663208, "learning_rate": 4.534773356956374e-07, "loss": 0.3752, "step": 14405 }, { "epoch": 2.09, "grad_norm": 9.268269538879395, "learning_rate": 4.5334478969559655e-07, "loss": 0.402, "step": 14406 }, { "epoch": 2.09, "grad_norm": 9.17883014678955, "learning_rate": 4.532122573911137e-07, "loss": 0.4009, "step": 14407 }, { "epoch": 2.09, "grad_norm": 8.214598655700684, "learning_rate": 4.5307973878550943e-07, "loss": 0.4157, "step": 14408 }, { "epoch": 2.09, "grad_norm": 7.827486515045166, "learning_rate": 4.5294723388210356e-07, "loss": 0.3732, "step": 14409 }, { "epoch": 2.09, "grad_norm": 6.831881999969482, "learning_rate": 4.5281474268421594e-07, "loss": 0.3097, "step": 14410 }, { "epoch": 2.09, "grad_norm": 10.239036560058594, "learning_rate": 4.526822651951658e-07, "loss": 0.3973, "step": 14411 }, { "epoch": 2.09, "grad_norm": 8.03327465057373, "learning_rate": 4.5254980141827226e-07, "loss": 0.3729, "step": 14412 }, { "epoch": 2.09, "grad_norm": 7.390741348266602, "learning_rate": 4.5241735135685387e-07, "loss": 0.3319, "step": 14413 }, { "epoch": 2.09, "grad_norm": 7.6520161628723145, "learning_rate": 4.5228491501422916e-07, "loss": 0.2873, "step": 14414 }, { "epoch": 2.09, "grad_norm": 7.6403937339782715, "learning_rate": 4.5215249239371576e-07, "loss": 0.3503, "step": 14415 }, { "epoch": 2.09, "grad_norm": 8.454041481018066, "learning_rate": 4.520200834986321e-07, "loss": 0.3884, "step": 14416 }, { "epoch": 2.09, "grad_norm": 8.508980751037598, "learning_rate": 4.5188768833229397e-07, "loss": 0.3666, "step": 14417 }, { "epoch": 2.09, "grad_norm": 8.111186981201172, "learning_rate": 4.5175530689801997e-07, "loss": 0.3814, "step": 14418 }, { "epoch": 2.09, "grad_norm": 8.593221664428711, "learning_rate": 4.516229391991252e-07, "loss": 0.3764, "step": 14419 }, { "epoch": 2.09, "grad_norm": 8.82112979888916, "learning_rate": 4.514905852389275e-07, "loss": 0.3206, "step": 14420 }, { "epoch": 2.09, "grad_norm": 9.275607109069824, "learning_rate": 4.513582450207415e-07, "loss": 0.4118, "step": 14421 }, { "epoch": 2.09, "grad_norm": 9.426835060119629, "learning_rate": 4.5122591854788326e-07, "loss": 0.3881, "step": 14422 }, { "epoch": 2.09, "grad_norm": 8.920869827270508, "learning_rate": 4.5109360582366797e-07, "loss": 0.3674, "step": 14423 }, { "epoch": 2.09, "grad_norm": 8.282608985900879, "learning_rate": 4.5096130685141055e-07, "loss": 0.3806, "step": 14424 }, { "epoch": 2.09, "grad_norm": 8.473747253417969, "learning_rate": 4.5082902163442536e-07, "loss": 0.3825, "step": 14425 }, { "epoch": 2.09, "grad_norm": 8.14275074005127, "learning_rate": 4.5069675017602684e-07, "loss": 0.3402, "step": 14426 }, { "epoch": 2.09, "grad_norm": 7.482920169830322, "learning_rate": 4.5056449247952867e-07, "loss": 0.3359, "step": 14427 }, { "epoch": 2.09, "grad_norm": 7.290525913238525, "learning_rate": 4.5043224854824445e-07, "loss": 0.3888, "step": 14428 }, { "epoch": 2.09, "grad_norm": 8.044242858886719, "learning_rate": 4.5030001838548725e-07, "loss": 0.3717, "step": 14429 }, { "epoch": 2.09, "grad_norm": 8.236032485961914, "learning_rate": 4.501678019945703e-07, "loss": 0.347, "step": 14430 }, { "epoch": 2.09, "grad_norm": 8.272818565368652, "learning_rate": 4.50035599378805e-07, "loss": 0.3282, "step": 14431 }, { "epoch": 2.09, "grad_norm": 7.295293807983398, "learning_rate": 4.4990341054150483e-07, "loss": 0.3194, "step": 14432 }, { "epoch": 2.09, "grad_norm": 8.282279014587402, "learning_rate": 4.4977123548598003e-07, "loss": 0.379, "step": 14433 }, { "epoch": 2.09, "grad_norm": 9.09489917755127, "learning_rate": 4.496390742155438e-07, "loss": 0.3342, "step": 14434 }, { "epoch": 2.09, "grad_norm": 7.365009307861328, "learning_rate": 4.495069267335057e-07, "loss": 0.285, "step": 14435 }, { "epoch": 2.09, "grad_norm": 7.000892162322998, "learning_rate": 4.4937479304317727e-07, "loss": 0.3259, "step": 14436 }, { "epoch": 2.09, "grad_norm": 8.334360122680664, "learning_rate": 4.492426731478691e-07, "loss": 0.3982, "step": 14437 }, { "epoch": 2.09, "grad_norm": 7.807607173919678, "learning_rate": 4.4911056705089024e-07, "loss": 0.3205, "step": 14438 }, { "epoch": 2.1, "grad_norm": 7.692538261413574, "learning_rate": 4.489784747555516e-07, "loss": 0.3182, "step": 14439 }, { "epoch": 2.1, "grad_norm": 8.7381591796875, "learning_rate": 4.488463962651614e-07, "loss": 0.3651, "step": 14440 }, { "epoch": 2.1, "grad_norm": 8.262638092041016, "learning_rate": 4.4871433158302986e-07, "loss": 0.3436, "step": 14441 }, { "epoch": 2.1, "grad_norm": 7.7768635749816895, "learning_rate": 4.4858228071246463e-07, "loss": 0.3455, "step": 14442 }, { "epoch": 2.1, "grad_norm": 8.082985877990723, "learning_rate": 4.484502436567744e-07, "loss": 0.3657, "step": 14443 }, { "epoch": 2.1, "grad_norm": 7.140829563140869, "learning_rate": 4.4831822041926716e-07, "loss": 0.3452, "step": 14444 }, { "epoch": 2.1, "grad_norm": 8.513277053833008, "learning_rate": 4.481862110032505e-07, "loss": 0.3218, "step": 14445 }, { "epoch": 2.1, "grad_norm": 8.603102684020996, "learning_rate": 4.480542154120317e-07, "loss": 0.3379, "step": 14446 }, { "epoch": 2.1, "grad_norm": 9.638102531433105, "learning_rate": 4.4792223364891777e-07, "loss": 0.3788, "step": 14447 }, { "epoch": 2.1, "grad_norm": 8.265036582946777, "learning_rate": 4.477902657172152e-07, "loss": 0.3484, "step": 14448 }, { "epoch": 2.1, "grad_norm": 6.908688545227051, "learning_rate": 4.476583116202303e-07, "loss": 0.3299, "step": 14449 }, { "epoch": 2.1, "grad_norm": 7.176362037658691, "learning_rate": 4.475263713612689e-07, "loss": 0.3364, "step": 14450 }, { "epoch": 2.1, "grad_norm": 7.735678672790527, "learning_rate": 4.4739444494363696e-07, "loss": 0.397, "step": 14451 }, { "epoch": 2.1, "grad_norm": 8.522265434265137, "learning_rate": 4.472625323706387e-07, "loss": 0.3693, "step": 14452 }, { "epoch": 2.1, "grad_norm": 8.517379760742188, "learning_rate": 4.471306336455802e-07, "loss": 0.3864, "step": 14453 }, { "epoch": 2.1, "grad_norm": 7.615737438201904, "learning_rate": 4.469987487717647e-07, "loss": 0.3387, "step": 14454 }, { "epoch": 2.1, "grad_norm": 8.245954513549805, "learning_rate": 4.4686687775249764e-07, "loss": 0.3509, "step": 14455 }, { "epoch": 2.1, "grad_norm": 8.23978328704834, "learning_rate": 4.467350205910819e-07, "loss": 0.3667, "step": 14456 }, { "epoch": 2.1, "grad_norm": 8.728004455566406, "learning_rate": 4.466031772908213e-07, "loss": 0.3535, "step": 14457 }, { "epoch": 2.1, "grad_norm": 7.49344539642334, "learning_rate": 4.4647134785501884e-07, "loss": 0.2884, "step": 14458 }, { "epoch": 2.1, "grad_norm": 8.20897388458252, "learning_rate": 4.463395322869774e-07, "loss": 0.3292, "step": 14459 }, { "epoch": 2.1, "grad_norm": 9.007291793823242, "learning_rate": 4.4620773058999936e-07, "loss": 0.3276, "step": 14460 }, { "epoch": 2.1, "grad_norm": 8.386384963989258, "learning_rate": 4.460759427673866e-07, "loss": 0.3597, "step": 14461 }, { "epoch": 2.1, "grad_norm": 8.397611618041992, "learning_rate": 4.459441688224416e-07, "loss": 0.3643, "step": 14462 }, { "epoch": 2.1, "grad_norm": 7.952788829803467, "learning_rate": 4.4581240875846495e-07, "loss": 0.3845, "step": 14463 }, { "epoch": 2.1, "grad_norm": 8.203227043151855, "learning_rate": 4.456806625787579e-07, "loss": 0.3435, "step": 14464 }, { "epoch": 2.1, "grad_norm": 8.313353538513184, "learning_rate": 4.455489302866211e-07, "loss": 0.3591, "step": 14465 }, { "epoch": 2.1, "grad_norm": 8.831948280334473, "learning_rate": 4.4541721188535507e-07, "loss": 0.3823, "step": 14466 }, { "epoch": 2.1, "grad_norm": 7.416340351104736, "learning_rate": 4.4528550737825965e-07, "loss": 0.3579, "step": 14467 }, { "epoch": 2.1, "grad_norm": 9.004414558410645, "learning_rate": 4.451538167686345e-07, "loss": 0.4004, "step": 14468 }, { "epoch": 2.1, "grad_norm": 7.501701831817627, "learning_rate": 4.45022140059779e-07, "loss": 0.3134, "step": 14469 }, { "epoch": 2.1, "grad_norm": 9.269762992858887, "learning_rate": 4.4489047725499206e-07, "loss": 0.3482, "step": 14470 }, { "epoch": 2.1, "grad_norm": 8.352869987487793, "learning_rate": 4.4475882835757216e-07, "loss": 0.3608, "step": 14471 }, { "epoch": 2.1, "grad_norm": 8.985421180725098, "learning_rate": 4.446271933708181e-07, "loss": 0.3534, "step": 14472 }, { "epoch": 2.1, "grad_norm": 8.179498672485352, "learning_rate": 4.444955722980265e-07, "loss": 0.385, "step": 14473 }, { "epoch": 2.1, "grad_norm": 8.572490692138672, "learning_rate": 4.4436396514249654e-07, "loss": 0.3298, "step": 14474 }, { "epoch": 2.1, "grad_norm": 8.20016098022461, "learning_rate": 4.4423237190752383e-07, "loss": 0.3396, "step": 14475 }, { "epoch": 2.1, "grad_norm": 8.451784133911133, "learning_rate": 4.441007925964068e-07, "loss": 0.4016, "step": 14476 }, { "epoch": 2.1, "grad_norm": 7.491681098937988, "learning_rate": 4.439692272124408e-07, "loss": 0.3459, "step": 14477 }, { "epoch": 2.1, "grad_norm": 7.900409698486328, "learning_rate": 4.4383767575892227e-07, "loss": 0.362, "step": 14478 }, { "epoch": 2.1, "grad_norm": 7.9715189933776855, "learning_rate": 4.4370613823914727e-07, "loss": 0.3761, "step": 14479 }, { "epoch": 2.1, "grad_norm": 9.400572776794434, "learning_rate": 4.43574614656411e-07, "loss": 0.4225, "step": 14480 }, { "epoch": 2.1, "grad_norm": 8.799488067626953, "learning_rate": 4.434431050140086e-07, "loss": 0.4053, "step": 14481 }, { "epoch": 2.1, "grad_norm": 7.596836090087891, "learning_rate": 4.4331160931523494e-07, "loss": 0.3147, "step": 14482 }, { "epoch": 2.1, "grad_norm": 8.152222633361816, "learning_rate": 4.431801275633844e-07, "loss": 0.3441, "step": 14483 }, { "epoch": 2.1, "grad_norm": 7.2749834060668945, "learning_rate": 4.430486597617511e-07, "loss": 0.326, "step": 14484 }, { "epoch": 2.1, "grad_norm": 8.01301097869873, "learning_rate": 4.4291720591362855e-07, "loss": 0.3256, "step": 14485 }, { "epoch": 2.1, "grad_norm": 7.802822589874268, "learning_rate": 4.4278576602231064e-07, "loss": 0.3751, "step": 14486 }, { "epoch": 2.1, "grad_norm": 7.464692115783691, "learning_rate": 4.4265434009108926e-07, "loss": 0.362, "step": 14487 }, { "epoch": 2.1, "grad_norm": 7.55225133895874, "learning_rate": 4.425229281232582e-07, "loss": 0.3654, "step": 14488 }, { "epoch": 2.1, "grad_norm": 7.7993621826171875, "learning_rate": 4.423915301221094e-07, "loss": 0.3404, "step": 14489 }, { "epoch": 2.1, "grad_norm": 7.824995994567871, "learning_rate": 4.4226014609093475e-07, "loss": 0.3599, "step": 14490 }, { "epoch": 2.1, "grad_norm": 7.38399600982666, "learning_rate": 4.421287760330259e-07, "loss": 0.336, "step": 14491 }, { "epoch": 2.1, "grad_norm": 8.110873222351074, "learning_rate": 4.4199741995167404e-07, "loss": 0.3313, "step": 14492 }, { "epoch": 2.1, "grad_norm": 9.778685569763184, "learning_rate": 4.418660778501707e-07, "loss": 0.3795, "step": 14493 }, { "epoch": 2.1, "grad_norm": 8.135430335998535, "learning_rate": 4.41734749731805e-07, "loss": 0.3482, "step": 14494 }, { "epoch": 2.1, "grad_norm": 9.59862232208252, "learning_rate": 4.416034355998689e-07, "loss": 0.3932, "step": 14495 }, { "epoch": 2.1, "grad_norm": 8.115028381347656, "learning_rate": 4.414721354576507e-07, "loss": 0.3658, "step": 14496 }, { "epoch": 2.1, "grad_norm": 8.83400821685791, "learning_rate": 4.413408493084414e-07, "loss": 0.3618, "step": 14497 }, { "epoch": 2.1, "grad_norm": 8.669193267822266, "learning_rate": 4.41209577155529e-07, "loss": 0.2912, "step": 14498 }, { "epoch": 2.1, "grad_norm": 9.485713958740234, "learning_rate": 4.410783190022026e-07, "loss": 0.3851, "step": 14499 }, { "epoch": 2.1, "grad_norm": 7.828882217407227, "learning_rate": 4.409470748517509e-07, "loss": 0.3407, "step": 14500 }, { "epoch": 2.1, "grad_norm": 8.402246475219727, "learning_rate": 4.408158447074618e-07, "loss": 0.3748, "step": 14501 }, { "epoch": 2.1, "grad_norm": 7.9799346923828125, "learning_rate": 4.4068462857262313e-07, "loss": 0.3223, "step": 14502 }, { "epoch": 2.1, "grad_norm": 8.120550155639648, "learning_rate": 4.405534264505223e-07, "loss": 0.3389, "step": 14503 }, { "epoch": 2.1, "grad_norm": 7.815695285797119, "learning_rate": 4.4042223834444636e-07, "loss": 0.3205, "step": 14504 }, { "epoch": 2.1, "grad_norm": 7.491930961608887, "learning_rate": 4.40291064257682e-07, "loss": 0.3373, "step": 14505 }, { "epoch": 2.1, "grad_norm": 7.426085948944092, "learning_rate": 4.4015990419351565e-07, "loss": 0.3669, "step": 14506 }, { "epoch": 2.1, "grad_norm": 7.6180033683776855, "learning_rate": 4.4002875815523366e-07, "loss": 0.367, "step": 14507 }, { "epoch": 2.11, "grad_norm": 7.37644100189209, "learning_rate": 4.3989762614612047e-07, "loss": 0.3293, "step": 14508 }, { "epoch": 2.11, "grad_norm": 8.157490730285645, "learning_rate": 4.3976650816946293e-07, "loss": 0.3524, "step": 14509 }, { "epoch": 2.11, "grad_norm": 8.313671112060547, "learning_rate": 4.396354042285445e-07, "loss": 0.3738, "step": 14510 }, { "epoch": 2.11, "grad_norm": 9.615882873535156, "learning_rate": 4.3950431432665123e-07, "loss": 0.4126, "step": 14511 }, { "epoch": 2.11, "grad_norm": 8.454299926757812, "learning_rate": 4.393732384670664e-07, "loss": 0.3452, "step": 14512 }, { "epoch": 2.11, "grad_norm": 7.63198709487915, "learning_rate": 4.3924217665307374e-07, "loss": 0.3172, "step": 14513 }, { "epoch": 2.11, "grad_norm": 7.448943614959717, "learning_rate": 4.3911112888795786e-07, "loss": 0.3264, "step": 14514 }, { "epoch": 2.11, "grad_norm": 7.5209527015686035, "learning_rate": 4.3898009517500066e-07, "loss": 0.366, "step": 14515 }, { "epoch": 2.11, "grad_norm": 9.102265357971191, "learning_rate": 4.388490755174863e-07, "loss": 0.3972, "step": 14516 }, { "epoch": 2.11, "grad_norm": 9.083877563476562, "learning_rate": 4.387180699186959e-07, "loss": 0.2891, "step": 14517 }, { "epoch": 2.11, "grad_norm": 8.7112398147583, "learning_rate": 4.385870783819129e-07, "loss": 0.4143, "step": 14518 }, { "epoch": 2.11, "grad_norm": 7.9871039390563965, "learning_rate": 4.384561009104182e-07, "loss": 0.3353, "step": 14519 }, { "epoch": 2.11, "grad_norm": 8.036941528320312, "learning_rate": 4.3832513750749335e-07, "loss": 0.3737, "step": 14520 }, { "epoch": 2.11, "grad_norm": 9.73240852355957, "learning_rate": 4.3819418817641963e-07, "loss": 0.4212, "step": 14521 }, { "epoch": 2.11, "grad_norm": 9.386029243469238, "learning_rate": 4.3806325292047773e-07, "loss": 0.4099, "step": 14522 }, { "epoch": 2.11, "grad_norm": 7.540764808654785, "learning_rate": 4.37932331742948e-07, "loss": 0.3412, "step": 14523 }, { "epoch": 2.11, "grad_norm": 8.003817558288574, "learning_rate": 4.3780142464711035e-07, "loss": 0.3438, "step": 14524 }, { "epoch": 2.11, "grad_norm": 7.471873760223389, "learning_rate": 4.3767053163624445e-07, "loss": 0.365, "step": 14525 }, { "epoch": 2.11, "grad_norm": 8.299500465393066, "learning_rate": 4.375396527136298e-07, "loss": 0.3843, "step": 14526 }, { "epoch": 2.11, "grad_norm": 8.28964900970459, "learning_rate": 4.374087878825452e-07, "loss": 0.3561, "step": 14527 }, { "epoch": 2.11, "grad_norm": 7.789240837097168, "learning_rate": 4.372779371462698e-07, "loss": 0.3498, "step": 14528 }, { "epoch": 2.11, "grad_norm": 7.479702949523926, "learning_rate": 4.3714710050808047e-07, "loss": 0.3605, "step": 14529 }, { "epoch": 2.11, "grad_norm": 7.89780330657959, "learning_rate": 4.3701627797125687e-07, "loss": 0.366, "step": 14530 }, { "epoch": 2.11, "grad_norm": 7.846365928649902, "learning_rate": 4.368854695390748e-07, "loss": 0.3766, "step": 14531 }, { "epoch": 2.11, "grad_norm": 8.749789237976074, "learning_rate": 4.367546752148131e-07, "loss": 0.3836, "step": 14532 }, { "epoch": 2.11, "grad_norm": 8.509474754333496, "learning_rate": 4.366238950017476e-07, "loss": 0.3559, "step": 14533 }, { "epoch": 2.11, "grad_norm": 9.250384330749512, "learning_rate": 4.3649312890315484e-07, "loss": 0.3404, "step": 14534 }, { "epoch": 2.11, "grad_norm": 7.464731693267822, "learning_rate": 4.3636237692231116e-07, "loss": 0.3604, "step": 14535 }, { "epoch": 2.11, "grad_norm": 9.14901351928711, "learning_rate": 4.362316390624924e-07, "loss": 0.3907, "step": 14536 }, { "epoch": 2.11, "grad_norm": 7.640432357788086, "learning_rate": 4.361009153269738e-07, "loss": 0.3586, "step": 14537 }, { "epoch": 2.11, "grad_norm": 7.8458781242370605, "learning_rate": 4.3597020571903066e-07, "loss": 0.3385, "step": 14538 }, { "epoch": 2.11, "grad_norm": 8.120017051696777, "learning_rate": 4.358395102419374e-07, "loss": 0.3324, "step": 14539 }, { "epoch": 2.11, "grad_norm": 7.807076454162598, "learning_rate": 4.357088288989686e-07, "loss": 0.3674, "step": 14540 }, { "epoch": 2.11, "grad_norm": 9.220263481140137, "learning_rate": 4.3557816169339836e-07, "loss": 0.3873, "step": 14541 }, { "epoch": 2.11, "grad_norm": 8.927095413208008, "learning_rate": 4.354475086285e-07, "loss": 0.3916, "step": 14542 }, { "epoch": 2.11, "grad_norm": 7.268760681152344, "learning_rate": 4.353168697075471e-07, "loss": 0.323, "step": 14543 }, { "epoch": 2.11, "grad_norm": 8.421134948730469, "learning_rate": 4.3518624493381253e-07, "loss": 0.3554, "step": 14544 }, { "epoch": 2.11, "grad_norm": 9.434391975402832, "learning_rate": 4.350556343105688e-07, "loss": 0.3228, "step": 14545 }, { "epoch": 2.11, "grad_norm": 7.476638317108154, "learning_rate": 4.3492503784108825e-07, "loss": 0.3385, "step": 14546 }, { "epoch": 2.11, "grad_norm": 7.9526543617248535, "learning_rate": 4.347944555286431e-07, "loss": 0.3691, "step": 14547 }, { "epoch": 2.11, "grad_norm": 7.560008525848389, "learning_rate": 4.3466388737650373e-07, "loss": 0.3823, "step": 14548 }, { "epoch": 2.11, "grad_norm": 7.884975910186768, "learning_rate": 4.3453333338794287e-07, "loss": 0.3548, "step": 14549 }, { "epoch": 2.11, "grad_norm": 9.67233943939209, "learning_rate": 4.3440279356622987e-07, "loss": 0.4082, "step": 14550 }, { "epoch": 2.11, "grad_norm": 8.337238311767578, "learning_rate": 4.3427226791463655e-07, "loss": 0.3341, "step": 14551 }, { "epoch": 2.11, "grad_norm": 7.477940082550049, "learning_rate": 4.3414175643643157e-07, "loss": 0.3565, "step": 14552 }, { "epoch": 2.11, "grad_norm": 7.649478435516357, "learning_rate": 4.340112591348862e-07, "loss": 0.2923, "step": 14553 }, { "epoch": 2.11, "grad_norm": 7.57889986038208, "learning_rate": 4.338807760132688e-07, "loss": 0.3228, "step": 14554 }, { "epoch": 2.11, "grad_norm": 8.36750602722168, "learning_rate": 4.3375030707484873e-07, "loss": 0.3339, "step": 14555 }, { "epoch": 2.11, "grad_norm": 8.846211433410645, "learning_rate": 4.3361985232289455e-07, "loss": 0.3984, "step": 14556 }, { "epoch": 2.11, "grad_norm": 9.179267883300781, "learning_rate": 4.334894117606748e-07, "loss": 0.3735, "step": 14557 }, { "epoch": 2.11, "grad_norm": 8.811248779296875, "learning_rate": 4.333589853914572e-07, "loss": 0.3939, "step": 14558 }, { "epoch": 2.11, "grad_norm": 8.613959312438965, "learning_rate": 4.332285732185097e-07, "loss": 0.3465, "step": 14559 }, { "epoch": 2.11, "grad_norm": 9.338054656982422, "learning_rate": 4.330981752450993e-07, "loss": 0.3492, "step": 14560 }, { "epoch": 2.11, "grad_norm": 9.435136795043945, "learning_rate": 4.32967791474493e-07, "loss": 0.4043, "step": 14561 }, { "epoch": 2.11, "grad_norm": 7.670454978942871, "learning_rate": 4.328374219099574e-07, "loss": 0.3622, "step": 14562 }, { "epoch": 2.11, "grad_norm": 8.087409019470215, "learning_rate": 4.3270706655475895e-07, "loss": 0.3845, "step": 14563 }, { "epoch": 2.11, "grad_norm": 7.607329845428467, "learning_rate": 4.3257672541216264e-07, "loss": 0.3183, "step": 14564 }, { "epoch": 2.11, "grad_norm": 7.937872886657715, "learning_rate": 4.3244639848543474e-07, "loss": 0.3338, "step": 14565 }, { "epoch": 2.11, "grad_norm": 8.71151065826416, "learning_rate": 4.323160857778401e-07, "loss": 0.361, "step": 14566 }, { "epoch": 2.11, "grad_norm": 8.117640495300293, "learning_rate": 4.3218578729264355e-07, "loss": 0.3447, "step": 14567 }, { "epoch": 2.11, "grad_norm": 7.9434661865234375, "learning_rate": 4.320555030331099e-07, "loss": 0.3193, "step": 14568 }, { "epoch": 2.11, "grad_norm": 9.384815216064453, "learning_rate": 4.319252330025022e-07, "loss": 0.4164, "step": 14569 }, { "epoch": 2.11, "grad_norm": 8.325861930847168, "learning_rate": 4.317949772040853e-07, "loss": 0.3306, "step": 14570 }, { "epoch": 2.11, "grad_norm": 9.460691452026367, "learning_rate": 4.3166473564112137e-07, "loss": 0.3995, "step": 14571 }, { "epoch": 2.11, "grad_norm": 8.170035362243652, "learning_rate": 4.3153450831687476e-07, "loss": 0.3781, "step": 14572 }, { "epoch": 2.11, "grad_norm": 8.0672607421875, "learning_rate": 4.3140429523460666e-07, "loss": 0.3398, "step": 14573 }, { "epoch": 2.11, "grad_norm": 8.755548477172852, "learning_rate": 4.3127409639758086e-07, "loss": 0.3579, "step": 14574 }, { "epoch": 2.11, "grad_norm": 7.873870849609375, "learning_rate": 4.3114391180905795e-07, "loss": 0.3496, "step": 14575 }, { "epoch": 2.11, "grad_norm": 9.146810531616211, "learning_rate": 4.310137414723002e-07, "loss": 0.4179, "step": 14576 }, { "epoch": 2.12, "grad_norm": 7.420720100402832, "learning_rate": 4.3088358539056857e-07, "loss": 0.3096, "step": 14577 }, { "epoch": 2.12, "grad_norm": 7.873804569244385, "learning_rate": 4.3075344356712404e-07, "loss": 0.3777, "step": 14578 }, { "epoch": 2.12, "grad_norm": 8.2295503616333, "learning_rate": 4.306233160052271e-07, "loss": 0.2984, "step": 14579 }, { "epoch": 2.12, "grad_norm": 8.754718780517578, "learning_rate": 4.304932027081377e-07, "loss": 0.3802, "step": 14580 }, { "epoch": 2.12, "grad_norm": 7.991901397705078, "learning_rate": 4.303631036791159e-07, "loss": 0.3797, "step": 14581 }, { "epoch": 2.12, "grad_norm": 8.599610328674316, "learning_rate": 4.302330189214208e-07, "loss": 0.4186, "step": 14582 }, { "epoch": 2.12, "grad_norm": 7.354522228240967, "learning_rate": 4.301029484383117e-07, "loss": 0.3079, "step": 14583 }, { "epoch": 2.12, "grad_norm": 8.948873519897461, "learning_rate": 4.2997289223304766e-07, "loss": 0.372, "step": 14584 }, { "epoch": 2.12, "grad_norm": 7.06471586227417, "learning_rate": 4.298428503088859e-07, "loss": 0.3456, "step": 14585 }, { "epoch": 2.12, "grad_norm": 9.832112312316895, "learning_rate": 4.297128226690858e-07, "loss": 0.451, "step": 14586 }, { "epoch": 2.12, "grad_norm": 8.160608291625977, "learning_rate": 4.295828093169036e-07, "loss": 0.3607, "step": 14587 }, { "epoch": 2.12, "grad_norm": 8.125038146972656, "learning_rate": 4.2945281025559796e-07, "loss": 0.3167, "step": 14588 }, { "epoch": 2.12, "grad_norm": 7.787335395812988, "learning_rate": 4.293228254884249e-07, "loss": 0.348, "step": 14589 }, { "epoch": 2.12, "grad_norm": 8.755270004272461, "learning_rate": 4.291928550186411e-07, "loss": 0.3851, "step": 14590 }, { "epoch": 2.12, "grad_norm": 7.376134872436523, "learning_rate": 4.2906289884950277e-07, "loss": 0.3233, "step": 14591 }, { "epoch": 2.12, "grad_norm": 8.277974128723145, "learning_rate": 4.2893295698426557e-07, "loss": 0.3764, "step": 14592 }, { "epoch": 2.12, "grad_norm": 7.702546119689941, "learning_rate": 4.2880302942618596e-07, "loss": 0.3046, "step": 14593 }, { "epoch": 2.12, "grad_norm": 7.60469388961792, "learning_rate": 4.28673116178518e-07, "loss": 0.3256, "step": 14594 }, { "epoch": 2.12, "grad_norm": 9.25832748413086, "learning_rate": 4.285432172445167e-07, "loss": 0.4208, "step": 14595 }, { "epoch": 2.12, "grad_norm": 8.703283309936523, "learning_rate": 4.284133326274367e-07, "loss": 0.3446, "step": 14596 }, { "epoch": 2.12, "grad_norm": 9.676830291748047, "learning_rate": 4.282834623305318e-07, "loss": 0.4099, "step": 14597 }, { "epoch": 2.12, "grad_norm": 9.454606056213379, "learning_rate": 4.28153606357056e-07, "loss": 0.3854, "step": 14598 }, { "epoch": 2.12, "grad_norm": 9.503666877746582, "learning_rate": 4.2802376471026226e-07, "loss": 0.3841, "step": 14599 }, { "epoch": 2.12, "grad_norm": 8.269749641418457, "learning_rate": 4.2789393739340386e-07, "loss": 0.3508, "step": 14600 }, { "epoch": 2.12, "grad_norm": 9.180020332336426, "learning_rate": 4.277641244097332e-07, "loss": 0.4098, "step": 14601 }, { "epoch": 2.12, "grad_norm": 10.437551498413086, "learning_rate": 4.276343257625026e-07, "loss": 0.3762, "step": 14602 }, { "epoch": 2.12, "grad_norm": 8.719805717468262, "learning_rate": 4.275045414549644e-07, "loss": 0.4112, "step": 14603 }, { "epoch": 2.12, "grad_norm": 7.5646162033081055, "learning_rate": 4.273747714903689e-07, "loss": 0.3549, "step": 14604 }, { "epoch": 2.12, "grad_norm": 8.777728080749512, "learning_rate": 4.2724501587196884e-07, "loss": 0.4177, "step": 14605 }, { "epoch": 2.12, "grad_norm": 7.9811482429504395, "learning_rate": 4.2711527460301354e-07, "loss": 0.2611, "step": 14606 }, { "epoch": 2.12, "grad_norm": 8.2772798538208, "learning_rate": 4.2698554768675497e-07, "loss": 0.364, "step": 14607 }, { "epoch": 2.12, "grad_norm": 7.950357913970947, "learning_rate": 4.268558351264417e-07, "loss": 0.3396, "step": 14608 }, { "epoch": 2.12, "grad_norm": 7.65537691116333, "learning_rate": 4.26726136925325e-07, "loss": 0.3227, "step": 14609 }, { "epoch": 2.12, "grad_norm": 8.668229103088379, "learning_rate": 4.26596453086653e-07, "loss": 0.3774, "step": 14610 }, { "epoch": 2.12, "grad_norm": 8.507378578186035, "learning_rate": 4.264667836136753e-07, "loss": 0.3796, "step": 14611 }, { "epoch": 2.12, "grad_norm": 7.951926231384277, "learning_rate": 4.2633712850964046e-07, "loss": 0.3603, "step": 14612 }, { "epoch": 2.12, "grad_norm": 8.570858001708984, "learning_rate": 4.2620748777779657e-07, "loss": 0.3407, "step": 14613 }, { "epoch": 2.12, "grad_norm": 7.6219964027404785, "learning_rate": 4.260778614213919e-07, "loss": 0.3114, "step": 14614 }, { "epoch": 2.12, "grad_norm": 8.627161026000977, "learning_rate": 4.259482494436739e-07, "loss": 0.3947, "step": 14615 }, { "epoch": 2.12, "grad_norm": 8.435995101928711, "learning_rate": 4.258186518478897e-07, "loss": 0.3654, "step": 14616 }, { "epoch": 2.12, "grad_norm": 7.2029619216918945, "learning_rate": 4.2568906863728626e-07, "loss": 0.335, "step": 14617 }, { "epoch": 2.12, "grad_norm": 8.124236106872559, "learning_rate": 4.2555949981511006e-07, "loss": 0.349, "step": 14618 }, { "epoch": 2.12, "grad_norm": 8.727893829345703, "learning_rate": 4.254299453846072e-07, "loss": 0.4378, "step": 14619 }, { "epoch": 2.12, "grad_norm": 8.724455833435059, "learning_rate": 4.2530040534902354e-07, "loss": 0.3783, "step": 14620 }, { "epoch": 2.12, "grad_norm": 8.686087608337402, "learning_rate": 4.2517087971160434e-07, "loss": 0.3833, "step": 14621 }, { "epoch": 2.12, "grad_norm": 8.016557693481445, "learning_rate": 4.250413684755949e-07, "loss": 0.353, "step": 14622 }, { "epoch": 2.12, "grad_norm": 8.017481803894043, "learning_rate": 4.2491187164423957e-07, "loss": 0.3813, "step": 14623 }, { "epoch": 2.12, "grad_norm": 8.088509559631348, "learning_rate": 4.247823892207834e-07, "loss": 0.3577, "step": 14624 }, { "epoch": 2.12, "grad_norm": 7.9776105880737305, "learning_rate": 4.246529212084691e-07, "loss": 0.3585, "step": 14625 }, { "epoch": 2.12, "grad_norm": 8.322118759155273, "learning_rate": 4.245234676105416e-07, "loss": 0.3461, "step": 14626 }, { "epoch": 2.12, "grad_norm": 7.533565998077393, "learning_rate": 4.24394028430243e-07, "loss": 0.3349, "step": 14627 }, { "epoch": 2.12, "grad_norm": 7.734140396118164, "learning_rate": 4.242646036708174e-07, "loss": 0.3493, "step": 14628 }, { "epoch": 2.12, "grad_norm": 7.394440650939941, "learning_rate": 4.2413519333550596e-07, "loss": 0.3483, "step": 14629 }, { "epoch": 2.12, "grad_norm": 7.5951385498046875, "learning_rate": 4.2400579742755226e-07, "loss": 0.3553, "step": 14630 }, { "epoch": 2.12, "grad_norm": 8.5651216506958, "learning_rate": 4.238764159501972e-07, "loss": 0.3351, "step": 14631 }, { "epoch": 2.12, "grad_norm": 8.821643829345703, "learning_rate": 4.2374704890668234e-07, "loss": 0.3511, "step": 14632 }, { "epoch": 2.12, "grad_norm": 10.778118133544922, "learning_rate": 4.236176963002488e-07, "loss": 0.3205, "step": 14633 }, { "epoch": 2.12, "grad_norm": 7.176661968231201, "learning_rate": 4.234883581341374e-07, "loss": 0.3364, "step": 14634 }, { "epoch": 2.12, "grad_norm": 7.975672245025635, "learning_rate": 4.233590344115885e-07, "loss": 0.3349, "step": 14635 }, { "epoch": 2.12, "grad_norm": 8.735397338867188, "learning_rate": 4.23229725135842e-07, "loss": 0.3791, "step": 14636 }, { "epoch": 2.12, "grad_norm": 7.7819647789001465, "learning_rate": 4.231004303101375e-07, "loss": 0.3633, "step": 14637 }, { "epoch": 2.12, "grad_norm": 7.245355606079102, "learning_rate": 4.2297114993771457e-07, "loss": 0.3361, "step": 14638 }, { "epoch": 2.12, "grad_norm": 12.160775184631348, "learning_rate": 4.2284188402181173e-07, "loss": 0.4273, "step": 14639 }, { "epoch": 2.12, "grad_norm": 8.78148365020752, "learning_rate": 4.2271263256566816e-07, "loss": 0.3647, "step": 14640 }, { "epoch": 2.12, "grad_norm": 8.67816162109375, "learning_rate": 4.225833955725208e-07, "loss": 0.3933, "step": 14641 }, { "epoch": 2.12, "grad_norm": 8.911090850830078, "learning_rate": 4.2245417304560904e-07, "loss": 0.3716, "step": 14642 }, { "epoch": 2.12, "grad_norm": 9.749373435974121, "learning_rate": 4.2232496498816885e-07, "loss": 0.4008, "step": 14643 }, { "epoch": 2.12, "grad_norm": 7.573893070220947, "learning_rate": 4.221957714034383e-07, "loss": 0.3173, "step": 14644 }, { "epoch": 2.12, "grad_norm": 8.45238971710205, "learning_rate": 4.220665922946542e-07, "loss": 0.3533, "step": 14645 }, { "epoch": 2.13, "grad_norm": 8.040172576904297, "learning_rate": 4.21937427665052e-07, "loss": 0.3772, "step": 14646 }, { "epoch": 2.13, "grad_norm": 8.856367111206055, "learning_rate": 4.2180827751786894e-07, "loss": 0.3819, "step": 14647 }, { "epoch": 2.13, "grad_norm": 7.967570781707764, "learning_rate": 4.2167914185633936e-07, "loss": 0.359, "step": 14648 }, { "epoch": 2.13, "grad_norm": 8.273365020751953, "learning_rate": 4.2155002068369993e-07, "loss": 0.3471, "step": 14649 }, { "epoch": 2.13, "grad_norm": 8.377366065979004, "learning_rate": 4.214209140031845e-07, "loss": 0.3174, "step": 14650 }, { "epoch": 2.13, "grad_norm": 9.240842819213867, "learning_rate": 4.212918218180279e-07, "loss": 0.3686, "step": 14651 }, { "epoch": 2.13, "grad_norm": 8.232799530029297, "learning_rate": 4.211627441314645e-07, "loss": 0.3736, "step": 14652 }, { "epoch": 2.13, "grad_norm": 8.416891098022461, "learning_rate": 4.2103368094672784e-07, "loss": 0.3141, "step": 14653 }, { "epoch": 2.13, "grad_norm": 7.293857097625732, "learning_rate": 4.2090463226705174e-07, "loss": 0.3329, "step": 14654 }, { "epoch": 2.13, "grad_norm": 8.975468635559082, "learning_rate": 4.207755980956692e-07, "loss": 0.396, "step": 14655 }, { "epoch": 2.13, "grad_norm": 8.022114753723145, "learning_rate": 4.2064657843581277e-07, "loss": 0.3362, "step": 14656 }, { "epoch": 2.13, "grad_norm": 7.776823997497559, "learning_rate": 4.2051757329071504e-07, "loss": 0.3451, "step": 14657 }, { "epoch": 2.13, "grad_norm": 8.78943920135498, "learning_rate": 4.20388582663608e-07, "loss": 0.3942, "step": 14658 }, { "epoch": 2.13, "grad_norm": 8.149847030639648, "learning_rate": 4.2025960655772353e-07, "loss": 0.3545, "step": 14659 }, { "epoch": 2.13, "grad_norm": 8.683802604675293, "learning_rate": 4.201306449762919e-07, "loss": 0.3651, "step": 14660 }, { "epoch": 2.13, "grad_norm": 8.374483108520508, "learning_rate": 4.200016979225456e-07, "loss": 0.3507, "step": 14661 }, { "epoch": 2.13, "grad_norm": 8.277859687805176, "learning_rate": 4.198727653997135e-07, "loss": 0.4244, "step": 14662 }, { "epoch": 2.13, "grad_norm": 8.637831687927246, "learning_rate": 4.197438474110275e-07, "loss": 0.3705, "step": 14663 }, { "epoch": 2.13, "grad_norm": 7.808871269226074, "learning_rate": 4.1961494395971577e-07, "loss": 0.3442, "step": 14664 }, { "epoch": 2.13, "grad_norm": 7.40403413772583, "learning_rate": 4.1948605504900937e-07, "loss": 0.2868, "step": 14665 }, { "epoch": 2.13, "grad_norm": 11.346149444580078, "learning_rate": 4.1935718068213623e-07, "loss": 0.4361, "step": 14666 }, { "epoch": 2.13, "grad_norm": 10.28649616241455, "learning_rate": 4.192283208623255e-07, "loss": 0.368, "step": 14667 }, { "epoch": 2.13, "grad_norm": 8.437009811401367, "learning_rate": 4.190994755928056e-07, "loss": 0.3523, "step": 14668 }, { "epoch": 2.13, "grad_norm": 7.73491907119751, "learning_rate": 4.1897064487680436e-07, "loss": 0.3645, "step": 14669 }, { "epoch": 2.13, "grad_norm": 8.206748008728027, "learning_rate": 4.188418287175496e-07, "loss": 0.3609, "step": 14670 }, { "epoch": 2.13, "grad_norm": 10.503137588500977, "learning_rate": 4.187130271182685e-07, "loss": 0.3971, "step": 14671 }, { "epoch": 2.13, "grad_norm": 9.248594284057617, "learning_rate": 4.185842400821881e-07, "loss": 0.4067, "step": 14672 }, { "epoch": 2.13, "grad_norm": 7.991621017456055, "learning_rate": 4.184554676125348e-07, "loss": 0.3513, "step": 14673 }, { "epoch": 2.13, "grad_norm": 9.318368911743164, "learning_rate": 4.183267097125349e-07, "loss": 0.3373, "step": 14674 }, { "epoch": 2.13, "grad_norm": 10.091848373413086, "learning_rate": 4.18197966385414e-07, "loss": 0.4202, "step": 14675 }, { "epoch": 2.13, "grad_norm": 8.704919815063477, "learning_rate": 4.1806923763439795e-07, "loss": 0.3813, "step": 14676 }, { "epoch": 2.13, "grad_norm": 7.929839134216309, "learning_rate": 4.179405234627115e-07, "loss": 0.4117, "step": 14677 }, { "epoch": 2.13, "grad_norm": 8.656495094299316, "learning_rate": 4.178118238735795e-07, "loss": 0.3844, "step": 14678 }, { "epoch": 2.13, "grad_norm": 8.092351913452148, "learning_rate": 4.176831388702263e-07, "loss": 0.3654, "step": 14679 }, { "epoch": 2.13, "grad_norm": 9.786687850952148, "learning_rate": 4.175544684558763e-07, "loss": 0.4108, "step": 14680 }, { "epoch": 2.13, "grad_norm": 7.482553005218506, "learning_rate": 4.1742581263375185e-07, "loss": 0.3564, "step": 14681 }, { "epoch": 2.13, "grad_norm": 9.668020248413086, "learning_rate": 4.1729717140707797e-07, "loss": 0.3778, "step": 14682 }, { "epoch": 2.13, "grad_norm": 8.510744094848633, "learning_rate": 4.171685447790758e-07, "loss": 0.361, "step": 14683 }, { "epoch": 2.13, "grad_norm": 8.223657608032227, "learning_rate": 4.1703993275296957e-07, "loss": 0.3358, "step": 14684 }, { "epoch": 2.13, "grad_norm": 8.731409072875977, "learning_rate": 4.169113353319802e-07, "loss": 0.3991, "step": 14685 }, { "epoch": 2.13, "grad_norm": 8.866018295288086, "learning_rate": 4.167827525193299e-07, "loss": 0.3553, "step": 14686 }, { "epoch": 2.13, "grad_norm": 8.042488098144531, "learning_rate": 4.166541843182402e-07, "loss": 0.3491, "step": 14687 }, { "epoch": 2.13, "grad_norm": 8.896170616149902, "learning_rate": 4.1652563073193204e-07, "loss": 0.3678, "step": 14688 }, { "epoch": 2.13, "grad_norm": 7.713598728179932, "learning_rate": 4.16397091763626e-07, "loss": 0.3565, "step": 14689 }, { "epoch": 2.13, "grad_norm": 8.639451026916504, "learning_rate": 4.162685674165427e-07, "loss": 0.3741, "step": 14690 }, { "epoch": 2.13, "grad_norm": 7.278616428375244, "learning_rate": 4.1614005769390206e-07, "loss": 0.3231, "step": 14691 }, { "epoch": 2.13, "grad_norm": 8.412583351135254, "learning_rate": 4.160115625989234e-07, "loss": 0.3698, "step": 14692 }, { "epoch": 2.13, "grad_norm": 7.715758323669434, "learning_rate": 4.1588308213482636e-07, "loss": 0.3369, "step": 14693 }, { "epoch": 2.13, "grad_norm": 8.499119758605957, "learning_rate": 4.1575461630482986e-07, "loss": 0.4209, "step": 14694 }, { "epoch": 2.13, "grad_norm": 10.516761779785156, "learning_rate": 4.1562616511215143e-07, "loss": 0.3988, "step": 14695 }, { "epoch": 2.13, "grad_norm": 8.396758079528809, "learning_rate": 4.154977285600103e-07, "loss": 0.3718, "step": 14696 }, { "epoch": 2.13, "grad_norm": 8.577630043029785, "learning_rate": 4.1536930665162384e-07, "loss": 0.3727, "step": 14697 }, { "epoch": 2.13, "grad_norm": 7.183276653289795, "learning_rate": 4.152408993902096e-07, "loss": 0.2961, "step": 14698 }, { "epoch": 2.13, "grad_norm": 7.592381954193115, "learning_rate": 4.1511250677898433e-07, "loss": 0.3692, "step": 14699 }, { "epoch": 2.13, "grad_norm": 7.692564964294434, "learning_rate": 4.1498412882116495e-07, "loss": 0.351, "step": 14700 }, { "epoch": 2.13, "grad_norm": 8.00583553314209, "learning_rate": 4.1485576551996805e-07, "loss": 0.3816, "step": 14701 }, { "epoch": 2.13, "grad_norm": 8.737582206726074, "learning_rate": 4.1472741687860833e-07, "loss": 0.3276, "step": 14702 }, { "epoch": 2.13, "grad_norm": 8.4401216506958, "learning_rate": 4.145990829003031e-07, "loss": 0.3519, "step": 14703 }, { "epoch": 2.13, "grad_norm": 8.29846477508545, "learning_rate": 4.144707635882658e-07, "loss": 0.3885, "step": 14704 }, { "epoch": 2.13, "grad_norm": 8.783164978027344, "learning_rate": 4.1434245894571293e-07, "loss": 0.3289, "step": 14705 }, { "epoch": 2.13, "grad_norm": 8.936992645263672, "learning_rate": 4.142141689758578e-07, "loss": 0.3448, "step": 14706 }, { "epoch": 2.13, "grad_norm": 6.9618988037109375, "learning_rate": 4.1408589368191494e-07, "loss": 0.3213, "step": 14707 }, { "epoch": 2.13, "grad_norm": 9.974576950073242, "learning_rate": 4.1395763306709785e-07, "loss": 0.4161, "step": 14708 }, { "epoch": 2.13, "grad_norm": 7.7457404136657715, "learning_rate": 4.138293871346201e-07, "loss": 0.3694, "step": 14709 }, { "epoch": 2.13, "grad_norm": 9.395254135131836, "learning_rate": 4.137011558876947e-07, "loss": 0.3986, "step": 14710 }, { "epoch": 2.13, "grad_norm": 8.693521499633789, "learning_rate": 4.135729393295342e-07, "loss": 0.3356, "step": 14711 }, { "epoch": 2.13, "grad_norm": 9.357559204101562, "learning_rate": 4.1344473746335075e-07, "loss": 0.3476, "step": 14712 }, { "epoch": 2.13, "grad_norm": 7.893328666687012, "learning_rate": 4.1331655029235647e-07, "loss": 0.3111, "step": 14713 }, { "epoch": 2.13, "grad_norm": 8.430645942687988, "learning_rate": 4.1318837781976267e-07, "loss": 0.3225, "step": 14714 }, { "epoch": 2.14, "grad_norm": 6.954765319824219, "learning_rate": 4.130602200487809e-07, "loss": 0.3338, "step": 14715 }, { "epoch": 2.14, "grad_norm": 8.29349422454834, "learning_rate": 4.129320769826211e-07, "loss": 0.386, "step": 14716 }, { "epoch": 2.14, "grad_norm": 8.236968040466309, "learning_rate": 4.1280394862449486e-07, "loss": 0.3961, "step": 14717 }, { "epoch": 2.14, "grad_norm": 8.115386962890625, "learning_rate": 4.1267583497761085e-07, "loss": 0.3298, "step": 14718 }, { "epoch": 2.14, "grad_norm": 7.742453575134277, "learning_rate": 4.125477360451802e-07, "loss": 0.3667, "step": 14719 }, { "epoch": 2.14, "grad_norm": 7.808334827423096, "learning_rate": 4.124196518304108e-07, "loss": 0.3616, "step": 14720 }, { "epoch": 2.14, "grad_norm": 7.357170104980469, "learning_rate": 4.122915823365131e-07, "loss": 0.3578, "step": 14721 }, { "epoch": 2.14, "grad_norm": 9.528654098510742, "learning_rate": 4.1216352756669446e-07, "loss": 0.3761, "step": 14722 }, { "epoch": 2.14, "grad_norm": 8.908805847167969, "learning_rate": 4.1203548752416317e-07, "loss": 0.3625, "step": 14723 }, { "epoch": 2.14, "grad_norm": 8.503680229187012, "learning_rate": 4.119074622121281e-07, "loss": 0.3578, "step": 14724 }, { "epoch": 2.14, "grad_norm": 9.243169784545898, "learning_rate": 4.1177945163379533e-07, "loss": 0.4828, "step": 14725 }, { "epoch": 2.14, "grad_norm": 8.499744415283203, "learning_rate": 4.1165145579237327e-07, "loss": 0.3241, "step": 14726 }, { "epoch": 2.14, "grad_norm": 8.960251808166504, "learning_rate": 4.1152347469106774e-07, "loss": 0.3868, "step": 14727 }, { "epoch": 2.14, "grad_norm": 7.232157230377197, "learning_rate": 4.1139550833308533e-07, "loss": 0.319, "step": 14728 }, { "epoch": 2.14, "grad_norm": 9.31412124633789, "learning_rate": 4.1126755672163213e-07, "loss": 0.39, "step": 14729 }, { "epoch": 2.14, "grad_norm": 7.768002033233643, "learning_rate": 4.1113961985991373e-07, "loss": 0.3256, "step": 14730 }, { "epoch": 2.14, "grad_norm": 8.346327781677246, "learning_rate": 4.110116977511353e-07, "loss": 0.3674, "step": 14731 }, { "epoch": 2.14, "grad_norm": 8.93051815032959, "learning_rate": 4.1088379039850176e-07, "loss": 0.3038, "step": 14732 }, { "epoch": 2.14, "grad_norm": 9.438146591186523, "learning_rate": 4.107558978052177e-07, "loss": 0.3545, "step": 14733 }, { "epoch": 2.14, "grad_norm": 8.422022819519043, "learning_rate": 4.1062801997448714e-07, "loss": 0.3206, "step": 14734 }, { "epoch": 2.14, "grad_norm": 7.927138805389404, "learning_rate": 4.1050015690951387e-07, "loss": 0.355, "step": 14735 }, { "epoch": 2.14, "grad_norm": 8.160672187805176, "learning_rate": 4.1037230861350183e-07, "loss": 0.3749, "step": 14736 }, { "epoch": 2.14, "grad_norm": 8.995980262756348, "learning_rate": 4.1024447508965275e-07, "loss": 0.3451, "step": 14737 }, { "epoch": 2.14, "grad_norm": 8.587261199951172, "learning_rate": 4.1011665634117075e-07, "loss": 0.3872, "step": 14738 }, { "epoch": 2.14, "grad_norm": 7.9335808753967285, "learning_rate": 4.099888523712569e-07, "loss": 0.2856, "step": 14739 }, { "epoch": 2.14, "grad_norm": 8.629772186279297, "learning_rate": 4.098610631831143e-07, "loss": 0.3661, "step": 14740 }, { "epoch": 2.14, "grad_norm": 7.314548492431641, "learning_rate": 4.097332887799435e-07, "loss": 0.3421, "step": 14741 }, { "epoch": 2.14, "grad_norm": 8.385640144348145, "learning_rate": 4.096055291649462e-07, "loss": 0.3184, "step": 14742 }, { "epoch": 2.14, "grad_norm": 8.259275436401367, "learning_rate": 4.09477784341323e-07, "loss": 0.4052, "step": 14743 }, { "epoch": 2.14, "grad_norm": 8.074363708496094, "learning_rate": 4.0935005431227445e-07, "loss": 0.3703, "step": 14744 }, { "epoch": 2.14, "grad_norm": 7.840977191925049, "learning_rate": 4.0922233908100046e-07, "loss": 0.3082, "step": 14745 }, { "epoch": 2.14, "grad_norm": 6.916173458099365, "learning_rate": 4.0909463865070094e-07, "loss": 0.3204, "step": 14746 }, { "epoch": 2.14, "grad_norm": 7.924381732940674, "learning_rate": 4.089669530245752e-07, "loss": 0.3953, "step": 14747 }, { "epoch": 2.14, "grad_norm": 7.853816986083984, "learning_rate": 4.0883928220582195e-07, "loss": 0.364, "step": 14748 }, { "epoch": 2.14, "grad_norm": 8.531611442565918, "learning_rate": 4.087116261976401e-07, "loss": 0.3338, "step": 14749 }, { "epoch": 2.14, "grad_norm": 8.282219886779785, "learning_rate": 4.0858398500322765e-07, "loss": 0.3773, "step": 14750 }, { "epoch": 2.14, "grad_norm": 8.978926658630371, "learning_rate": 4.0845635862578256e-07, "loss": 0.3335, "step": 14751 }, { "epoch": 2.14, "grad_norm": 7.805586338043213, "learning_rate": 4.0832874706850216e-07, "loss": 0.389, "step": 14752 }, { "epoch": 2.14, "grad_norm": 8.103378295898438, "learning_rate": 4.082011503345837e-07, "loss": 0.3974, "step": 14753 }, { "epoch": 2.14, "grad_norm": 7.922877311706543, "learning_rate": 4.0807356842722383e-07, "loss": 0.3362, "step": 14754 }, { "epoch": 2.14, "grad_norm": 7.604578971862793, "learning_rate": 4.079460013496189e-07, "loss": 0.3188, "step": 14755 }, { "epoch": 2.14, "grad_norm": 7.395714282989502, "learning_rate": 4.07818449104965e-07, "loss": 0.3112, "step": 14756 }, { "epoch": 2.14, "grad_norm": 7.032092571258545, "learning_rate": 4.0769091169645785e-07, "loss": 0.3102, "step": 14757 }, { "epoch": 2.14, "grad_norm": 8.476692199707031, "learning_rate": 4.0756338912729194e-07, "loss": 0.4101, "step": 14758 }, { "epoch": 2.14, "grad_norm": 8.064335823059082, "learning_rate": 4.074358814006634e-07, "loss": 0.358, "step": 14759 }, { "epoch": 2.14, "grad_norm": 8.253223419189453, "learning_rate": 4.0730838851976534e-07, "loss": 0.3653, "step": 14760 }, { "epoch": 2.14, "grad_norm": 9.258742332458496, "learning_rate": 4.0718091048779324e-07, "loss": 0.3689, "step": 14761 }, { "epoch": 2.14, "grad_norm": 7.87493371963501, "learning_rate": 4.070534473079399e-07, "loss": 0.3396, "step": 14762 }, { "epoch": 2.14, "grad_norm": 7.7500691413879395, "learning_rate": 4.0692599898339895e-07, "loss": 0.3307, "step": 14763 }, { "epoch": 2.14, "grad_norm": 8.586445808410645, "learning_rate": 4.067985655173635e-07, "loss": 0.3954, "step": 14764 }, { "epoch": 2.14, "grad_norm": 8.03287124633789, "learning_rate": 4.066711469130261e-07, "loss": 0.355, "step": 14765 }, { "epoch": 2.14, "grad_norm": 8.98050594329834, "learning_rate": 4.06543743173579e-07, "loss": 0.3351, "step": 14766 }, { "epoch": 2.14, "grad_norm": 8.993037223815918, "learning_rate": 4.0641635430221423e-07, "loss": 0.4109, "step": 14767 }, { "epoch": 2.14, "grad_norm": 8.795140266418457, "learning_rate": 4.0628898030212323e-07, "loss": 0.3327, "step": 14768 }, { "epoch": 2.14, "grad_norm": 8.41522216796875, "learning_rate": 4.061616211764971e-07, "loss": 0.366, "step": 14769 }, { "epoch": 2.14, "grad_norm": 8.773541450500488, "learning_rate": 4.0603427692852677e-07, "loss": 0.3051, "step": 14770 }, { "epoch": 2.14, "grad_norm": 8.004424095153809, "learning_rate": 4.0590694756140275e-07, "loss": 0.3785, "step": 14771 }, { "epoch": 2.14, "grad_norm": 8.278870582580566, "learning_rate": 4.057796330783143e-07, "loss": 0.3718, "step": 14772 }, { "epoch": 2.14, "grad_norm": 7.22877836227417, "learning_rate": 4.056523334824522e-07, "loss": 0.3738, "step": 14773 }, { "epoch": 2.14, "grad_norm": 9.016469955444336, "learning_rate": 4.0552504877700454e-07, "loss": 0.3846, "step": 14774 }, { "epoch": 2.14, "grad_norm": 8.12810230255127, "learning_rate": 4.053977789651613e-07, "loss": 0.3745, "step": 14775 }, { "epoch": 2.14, "grad_norm": 8.02023983001709, "learning_rate": 4.0527052405011076e-07, "loss": 0.3182, "step": 14776 }, { "epoch": 2.14, "grad_norm": 9.094527244567871, "learning_rate": 4.0514328403504026e-07, "loss": 0.4388, "step": 14777 }, { "epoch": 2.14, "grad_norm": 8.467201232910156, "learning_rate": 4.0501605892313907e-07, "loss": 0.4067, "step": 14778 }, { "epoch": 2.14, "grad_norm": 7.743472099304199, "learning_rate": 4.048888487175929e-07, "loss": 0.3494, "step": 14779 }, { "epoch": 2.14, "grad_norm": 9.574487686157227, "learning_rate": 4.047616534215904e-07, "loss": 0.3292, "step": 14780 }, { "epoch": 2.14, "grad_norm": 7.57973051071167, "learning_rate": 4.046344730383169e-07, "loss": 0.3381, "step": 14781 }, { "epoch": 2.14, "grad_norm": 7.805491924285889, "learning_rate": 4.0450730757095996e-07, "loss": 0.3369, "step": 14782 }, { "epoch": 2.14, "grad_norm": 8.261754035949707, "learning_rate": 4.0438015702270457e-07, "loss": 0.3447, "step": 14783 }, { "epoch": 2.15, "grad_norm": 8.129940032958984, "learning_rate": 4.0425302139673645e-07, "loss": 0.2839, "step": 14784 }, { "epoch": 2.15, "grad_norm": 9.316108703613281, "learning_rate": 4.04125900696241e-07, "loss": 0.3714, "step": 14785 }, { "epoch": 2.15, "grad_norm": 8.479047775268555, "learning_rate": 4.0399879492440283e-07, "loss": 0.3241, "step": 14786 }, { "epoch": 2.15, "grad_norm": 7.386624336242676, "learning_rate": 4.0387170408440653e-07, "loss": 0.3157, "step": 14787 }, { "epoch": 2.15, "grad_norm": 7.013631820678711, "learning_rate": 4.03744628179436e-07, "loss": 0.331, "step": 14788 }, { "epoch": 2.15, "grad_norm": 8.01855182647705, "learning_rate": 4.03617567212675e-07, "loss": 0.3731, "step": 14789 }, { "epoch": 2.15, "grad_norm": 7.288877964019775, "learning_rate": 4.034905211873069e-07, "loss": 0.3308, "step": 14790 }, { "epoch": 2.15, "grad_norm": 8.376444816589355, "learning_rate": 4.033634901065145e-07, "loss": 0.3293, "step": 14791 }, { "epoch": 2.15, "grad_norm": 7.985535621643066, "learning_rate": 4.032364739734808e-07, "loss": 0.3515, "step": 14792 }, { "epoch": 2.15, "grad_norm": 7.621455192565918, "learning_rate": 4.0310947279138696e-07, "loss": 0.365, "step": 14793 }, { "epoch": 2.15, "grad_norm": 7.178707122802734, "learning_rate": 4.02982486563416e-07, "loss": 0.3483, "step": 14794 }, { "epoch": 2.15, "grad_norm": 8.189874649047852, "learning_rate": 4.0285551529274817e-07, "loss": 0.3654, "step": 14795 }, { "epoch": 2.15, "grad_norm": 9.974471092224121, "learning_rate": 4.0272855898256586e-07, "loss": 0.4169, "step": 14796 }, { "epoch": 2.15, "grad_norm": 9.081611633300781, "learning_rate": 4.026016176360487e-07, "loss": 0.372, "step": 14797 }, { "epoch": 2.15, "grad_norm": 8.821906089782715, "learning_rate": 4.0247469125637734e-07, "loss": 0.3747, "step": 14798 }, { "epoch": 2.15, "grad_norm": 10.807294845581055, "learning_rate": 4.0234777984673176e-07, "loss": 0.3822, "step": 14799 }, { "epoch": 2.15, "grad_norm": 9.269302368164062, "learning_rate": 4.0222088341029104e-07, "loss": 0.3696, "step": 14800 }, { "epoch": 2.15, "grad_norm": 8.026658058166504, "learning_rate": 4.0209400195023547e-07, "loss": 0.3402, "step": 14801 }, { "epoch": 2.15, "grad_norm": 7.836880207061768, "learning_rate": 4.019671354697425e-07, "loss": 0.3342, "step": 14802 }, { "epoch": 2.15, "grad_norm": 8.579882621765137, "learning_rate": 4.01840283971992e-07, "loss": 0.3081, "step": 14803 }, { "epoch": 2.15, "grad_norm": 9.608246803283691, "learning_rate": 4.017134474601609e-07, "loss": 0.361, "step": 14804 }, { "epoch": 2.15, "grad_norm": 8.804094314575195, "learning_rate": 4.015866259374273e-07, "loss": 0.3704, "step": 14805 }, { "epoch": 2.15, "grad_norm": 9.122130393981934, "learning_rate": 4.014598194069684e-07, "loss": 0.3829, "step": 14806 }, { "epoch": 2.15, "grad_norm": 7.72271728515625, "learning_rate": 4.013330278719612e-07, "loss": 0.3278, "step": 14807 }, { "epoch": 2.15, "grad_norm": 9.844870567321777, "learning_rate": 4.0120625133558237e-07, "loss": 0.3717, "step": 14808 }, { "epoch": 2.15, "grad_norm": 7.699187755584717, "learning_rate": 4.010794898010078e-07, "loss": 0.3555, "step": 14809 }, { "epoch": 2.15, "grad_norm": 8.394515991210938, "learning_rate": 4.009527432714136e-07, "loss": 0.4023, "step": 14810 }, { "epoch": 2.15, "grad_norm": 8.245748519897461, "learning_rate": 4.008260117499751e-07, "loss": 0.3184, "step": 14811 }, { "epoch": 2.15, "grad_norm": 6.949766635894775, "learning_rate": 4.0069929523986726e-07, "loss": 0.2952, "step": 14812 }, { "epoch": 2.15, "grad_norm": 7.0797810554504395, "learning_rate": 4.005725937442651e-07, "loss": 0.304, "step": 14813 }, { "epoch": 2.15, "grad_norm": 8.412822723388672, "learning_rate": 4.00445907266342e-07, "loss": 0.3353, "step": 14814 }, { "epoch": 2.15, "grad_norm": 7.977449417114258, "learning_rate": 4.003192358092732e-07, "loss": 0.3311, "step": 14815 }, { "epoch": 2.15, "grad_norm": 8.10057544708252, "learning_rate": 4.0019257937623083e-07, "loss": 0.3534, "step": 14816 }, { "epoch": 2.15, "grad_norm": 10.696388244628906, "learning_rate": 4.000659379703897e-07, "loss": 0.4074, "step": 14817 }, { "epoch": 2.15, "grad_norm": 9.599550247192383, "learning_rate": 3.999393115949211e-07, "loss": 0.369, "step": 14818 }, { "epoch": 2.15, "grad_norm": 7.910160541534424, "learning_rate": 3.998127002529982e-07, "loss": 0.3512, "step": 14819 }, { "epoch": 2.15, "grad_norm": 7.91403865814209, "learning_rate": 3.996861039477929e-07, "loss": 0.2798, "step": 14820 }, { "epoch": 2.15, "grad_norm": 8.017728805541992, "learning_rate": 3.995595226824767e-07, "loss": 0.3164, "step": 14821 }, { "epoch": 2.15, "grad_norm": 8.029473304748535, "learning_rate": 3.994329564602212e-07, "loss": 0.3352, "step": 14822 }, { "epoch": 2.15, "grad_norm": 7.907951831817627, "learning_rate": 3.993064052841971e-07, "loss": 0.2994, "step": 14823 }, { "epoch": 2.15, "grad_norm": 7.110885143280029, "learning_rate": 3.991798691575751e-07, "loss": 0.3146, "step": 14824 }, { "epoch": 2.15, "grad_norm": 9.431952476501465, "learning_rate": 3.990533480835251e-07, "loss": 0.4711, "step": 14825 }, { "epoch": 2.15, "grad_norm": 7.312183380126953, "learning_rate": 3.989268420652171e-07, "loss": 0.3704, "step": 14826 }, { "epoch": 2.15, "grad_norm": 8.982832908630371, "learning_rate": 3.9880035110582036e-07, "loss": 0.3719, "step": 14827 }, { "epoch": 2.15, "grad_norm": 8.000760078430176, "learning_rate": 3.98673875208504e-07, "loss": 0.3449, "step": 14828 }, { "epoch": 2.15, "grad_norm": 7.934587001800537, "learning_rate": 3.985474143764367e-07, "loss": 0.3698, "step": 14829 }, { "epoch": 2.15, "grad_norm": 7.927262306213379, "learning_rate": 3.9842096861278663e-07, "loss": 0.336, "step": 14830 }, { "epoch": 2.15, "grad_norm": 8.309823989868164, "learning_rate": 3.9829453792072174e-07, "loss": 0.3942, "step": 14831 }, { "epoch": 2.15, "grad_norm": 7.888712406158447, "learning_rate": 3.981681223034099e-07, "loss": 0.2997, "step": 14832 }, { "epoch": 2.15, "grad_norm": 7.870940208435059, "learning_rate": 3.980417217640172e-07, "loss": 0.3757, "step": 14833 }, { "epoch": 2.15, "grad_norm": 9.414193153381348, "learning_rate": 3.9791533630571174e-07, "loss": 0.428, "step": 14834 }, { "epoch": 2.15, "grad_norm": 7.814210414886475, "learning_rate": 3.9778896593165855e-07, "loss": 0.3382, "step": 14835 }, { "epoch": 2.15, "grad_norm": 8.431730270385742, "learning_rate": 3.9766261064502503e-07, "loss": 0.3101, "step": 14836 }, { "epoch": 2.15, "grad_norm": 7.916334629058838, "learning_rate": 3.9753627044897545e-07, "loss": 0.3408, "step": 14837 }, { "epoch": 2.15, "grad_norm": 7.948630332946777, "learning_rate": 3.9740994534667637e-07, "loss": 0.3121, "step": 14838 }, { "epoch": 2.15, "grad_norm": 7.134397029876709, "learning_rate": 3.972836353412916e-07, "loss": 0.3227, "step": 14839 }, { "epoch": 2.15, "grad_norm": 7.639434814453125, "learning_rate": 3.9715734043598615e-07, "loss": 0.3292, "step": 14840 }, { "epoch": 2.15, "grad_norm": 9.724577903747559, "learning_rate": 3.97031060633924e-07, "loss": 0.4089, "step": 14841 }, { "epoch": 2.15, "grad_norm": 8.067923545837402, "learning_rate": 3.9690479593826876e-07, "loss": 0.3293, "step": 14842 }, { "epoch": 2.15, "grad_norm": 7.726056098937988, "learning_rate": 3.967785463521841e-07, "loss": 0.3869, "step": 14843 }, { "epoch": 2.15, "grad_norm": 8.462404251098633, "learning_rate": 3.9665231187883275e-07, "loss": 0.3521, "step": 14844 }, { "epoch": 2.15, "grad_norm": 8.437341690063477, "learning_rate": 3.9652609252137734e-07, "loss": 0.3756, "step": 14845 }, { "epoch": 2.15, "grad_norm": 7.56744909286499, "learning_rate": 3.963998882829803e-07, "loss": 0.3447, "step": 14846 }, { "epoch": 2.15, "grad_norm": 9.183960914611816, "learning_rate": 3.962736991668032e-07, "loss": 0.3769, "step": 14847 }, { "epoch": 2.15, "grad_norm": 9.75471305847168, "learning_rate": 3.96147525176008e-07, "loss": 0.4152, "step": 14848 }, { "epoch": 2.15, "grad_norm": 7.5751543045043945, "learning_rate": 3.960213663137546e-07, "loss": 0.2891, "step": 14849 }, { "epoch": 2.15, "grad_norm": 10.013394355773926, "learning_rate": 3.958952225832053e-07, "loss": 0.4111, "step": 14850 }, { "epoch": 2.15, "grad_norm": 8.391775131225586, "learning_rate": 3.95769093987519e-07, "loss": 0.3859, "step": 14851 }, { "epoch": 2.15, "grad_norm": 7.888488292694092, "learning_rate": 3.956429805298569e-07, "loss": 0.3612, "step": 14852 }, { "epoch": 2.16, "grad_norm": 8.392670631408691, "learning_rate": 3.9551688221337755e-07, "loss": 0.4371, "step": 14853 }, { "epoch": 2.16, "grad_norm": 7.924212455749512, "learning_rate": 3.9539079904124016e-07, "loss": 0.3439, "step": 14854 }, { "epoch": 2.16, "grad_norm": 9.05311393737793, "learning_rate": 3.952647310166047e-07, "loss": 0.3688, "step": 14855 }, { "epoch": 2.16, "grad_norm": 8.306036949157715, "learning_rate": 3.951386781426281e-07, "loss": 0.3512, "step": 14856 }, { "epoch": 2.16, "grad_norm": 9.409666061401367, "learning_rate": 3.9501264042246984e-07, "loss": 0.3629, "step": 14857 }, { "epoch": 2.16, "grad_norm": 12.221673011779785, "learning_rate": 3.948866178592861e-07, "loss": 0.4365, "step": 14858 }, { "epoch": 2.16, "grad_norm": 7.747270584106445, "learning_rate": 3.947606104562358e-07, "loss": 0.3645, "step": 14859 }, { "epoch": 2.16, "grad_norm": 9.620150566101074, "learning_rate": 3.9463461821647447e-07, "loss": 0.3723, "step": 14860 }, { "epoch": 2.16, "grad_norm": 8.588447570800781, "learning_rate": 3.945086411431593e-07, "loss": 0.353, "step": 14861 }, { "epoch": 2.16, "grad_norm": 8.970773696899414, "learning_rate": 3.9438267923944625e-07, "loss": 0.4196, "step": 14862 }, { "epoch": 2.16, "grad_norm": 9.392268180847168, "learning_rate": 3.942567325084911e-07, "loss": 0.3984, "step": 14863 }, { "epoch": 2.16, "grad_norm": 8.749469757080078, "learning_rate": 3.941308009534493e-07, "loss": 0.3476, "step": 14864 }, { "epoch": 2.16, "grad_norm": 9.086908340454102, "learning_rate": 3.940048845774758e-07, "loss": 0.3928, "step": 14865 }, { "epoch": 2.16, "grad_norm": 8.604562759399414, "learning_rate": 3.938789833837254e-07, "loss": 0.4088, "step": 14866 }, { "epoch": 2.16, "grad_norm": 8.002981185913086, "learning_rate": 3.9375309737535254e-07, "loss": 0.3606, "step": 14867 }, { "epoch": 2.16, "grad_norm": 8.718210220336914, "learning_rate": 3.9362722655551005e-07, "loss": 0.3732, "step": 14868 }, { "epoch": 2.16, "grad_norm": 8.801575660705566, "learning_rate": 3.935013709273529e-07, "loss": 0.386, "step": 14869 }, { "epoch": 2.16, "grad_norm": 7.488183975219727, "learning_rate": 3.9337553049403274e-07, "loss": 0.3717, "step": 14870 }, { "epoch": 2.16, "grad_norm": 8.624703407287598, "learning_rate": 3.932497052587036e-07, "loss": 0.373, "step": 14871 }, { "epoch": 2.16, "grad_norm": 8.674959182739258, "learning_rate": 3.9312389522451663e-07, "loss": 0.3688, "step": 14872 }, { "epoch": 2.16, "grad_norm": 8.140848159790039, "learning_rate": 3.929981003946251e-07, "loss": 0.3673, "step": 14873 }, { "epoch": 2.16, "grad_norm": 7.99705696105957, "learning_rate": 3.928723207721795e-07, "loss": 0.3082, "step": 14874 }, { "epoch": 2.16, "grad_norm": 8.62099838256836, "learning_rate": 3.927465563603314e-07, "loss": 0.3752, "step": 14875 }, { "epoch": 2.16, "grad_norm": 7.936001777648926, "learning_rate": 3.926208071622317e-07, "loss": 0.2876, "step": 14876 }, { "epoch": 2.16, "grad_norm": 7.62928581237793, "learning_rate": 3.924950731810307e-07, "loss": 0.3176, "step": 14877 }, { "epoch": 2.16, "grad_norm": 8.469779968261719, "learning_rate": 3.923693544198786e-07, "loss": 0.3559, "step": 14878 }, { "epoch": 2.16, "grad_norm": 8.568021774291992, "learning_rate": 3.922436508819251e-07, "loss": 0.3949, "step": 14879 }, { "epoch": 2.16, "grad_norm": 8.185357093811035, "learning_rate": 3.9211796257031937e-07, "loss": 0.3591, "step": 14880 }, { "epoch": 2.16, "grad_norm": 8.296563148498535, "learning_rate": 3.919922894882104e-07, "loss": 0.3536, "step": 14881 }, { "epoch": 2.16, "grad_norm": 7.455447196960449, "learning_rate": 3.918666316387467e-07, "loss": 0.3277, "step": 14882 }, { "epoch": 2.16, "grad_norm": 8.566065788269043, "learning_rate": 3.9174098902507633e-07, "loss": 0.3677, "step": 14883 }, { "epoch": 2.16, "grad_norm": 10.29869556427002, "learning_rate": 3.9161536165034736e-07, "loss": 0.4075, "step": 14884 }, { "epoch": 2.16, "grad_norm": 8.379526138305664, "learning_rate": 3.9148974951770686e-07, "loss": 0.3381, "step": 14885 }, { "epoch": 2.16, "grad_norm": 8.343977928161621, "learning_rate": 3.91364152630302e-07, "loss": 0.3294, "step": 14886 }, { "epoch": 2.16, "grad_norm": 9.428223609924316, "learning_rate": 3.912385709912793e-07, "loss": 0.4386, "step": 14887 }, { "epoch": 2.16, "grad_norm": 8.424712181091309, "learning_rate": 3.9111300460378547e-07, "loss": 0.3534, "step": 14888 }, { "epoch": 2.16, "grad_norm": 8.980992317199707, "learning_rate": 3.909874534709653e-07, "loss": 0.4001, "step": 14889 }, { "epoch": 2.16, "grad_norm": 8.787914276123047, "learning_rate": 3.9086191759596557e-07, "loss": 0.3547, "step": 14890 }, { "epoch": 2.16, "grad_norm": 8.934133529663086, "learning_rate": 3.9073639698193006e-07, "loss": 0.3748, "step": 14891 }, { "epoch": 2.16, "grad_norm": 8.79603385925293, "learning_rate": 3.906108916320049e-07, "loss": 0.3731, "step": 14892 }, { "epoch": 2.16, "grad_norm": 7.591175079345703, "learning_rate": 3.9048540154933304e-07, "loss": 0.3346, "step": 14893 }, { "epoch": 2.16, "grad_norm": 7.871302127838135, "learning_rate": 3.9035992673705975e-07, "loss": 0.3694, "step": 14894 }, { "epoch": 2.16, "grad_norm": 7.335137367248535, "learning_rate": 3.9023446719832755e-07, "loss": 0.351, "step": 14895 }, { "epoch": 2.16, "grad_norm": 8.333569526672363, "learning_rate": 3.9010902293628014e-07, "loss": 0.3808, "step": 14896 }, { "epoch": 2.16, "grad_norm": 8.131329536437988, "learning_rate": 3.8998359395406e-07, "loss": 0.3768, "step": 14897 }, { "epoch": 2.16, "grad_norm": 6.963815689086914, "learning_rate": 3.898581802548099e-07, "loss": 0.2852, "step": 14898 }, { "epoch": 2.16, "grad_norm": 8.556467056274414, "learning_rate": 3.8973278184167156e-07, "loss": 0.352, "step": 14899 }, { "epoch": 2.16, "grad_norm": 8.721505165100098, "learning_rate": 3.8960739871778693e-07, "loss": 0.3452, "step": 14900 }, { "epoch": 2.16, "grad_norm": 8.002073287963867, "learning_rate": 3.894820308862971e-07, "loss": 0.3686, "step": 14901 }, { "epoch": 2.16, "grad_norm": 7.989340782165527, "learning_rate": 3.8935667835034305e-07, "loss": 0.3698, "step": 14902 }, { "epoch": 2.16, "grad_norm": 7.8671345710754395, "learning_rate": 3.8923134111306513e-07, "loss": 0.3295, "step": 14903 }, { "epoch": 2.16, "grad_norm": 8.93535327911377, "learning_rate": 3.89106019177604e-07, "loss": 0.3286, "step": 14904 }, { "epoch": 2.16, "grad_norm": 8.11449146270752, "learning_rate": 3.889807125470982e-07, "loss": 0.3308, "step": 14905 }, { "epoch": 2.16, "grad_norm": 6.941207408905029, "learning_rate": 3.8885542122468816e-07, "loss": 0.3059, "step": 14906 }, { "epoch": 2.16, "grad_norm": 7.060068607330322, "learning_rate": 3.8873014521351266e-07, "loss": 0.3157, "step": 14907 }, { "epoch": 2.16, "grad_norm": 7.9328508377075195, "learning_rate": 3.8860488451671e-07, "loss": 0.3316, "step": 14908 }, { "epoch": 2.16, "grad_norm": 8.406547546386719, "learning_rate": 3.884796391374191e-07, "loss": 0.3518, "step": 14909 }, { "epoch": 2.16, "grad_norm": 7.657895565032959, "learning_rate": 3.8835440907877635e-07, "loss": 0.3387, "step": 14910 }, { "epoch": 2.16, "grad_norm": 7.63125467300415, "learning_rate": 3.8822919434392086e-07, "loss": 0.3011, "step": 14911 }, { "epoch": 2.16, "grad_norm": 9.716094017028809, "learning_rate": 3.881039949359881e-07, "loss": 0.3862, "step": 14912 }, { "epoch": 2.16, "grad_norm": 8.421758651733398, "learning_rate": 3.8797881085811636e-07, "loss": 0.3857, "step": 14913 }, { "epoch": 2.16, "grad_norm": 8.646632194519043, "learning_rate": 3.8785364211344063e-07, "loss": 0.3164, "step": 14914 }, { "epoch": 2.16, "grad_norm": 8.009477615356445, "learning_rate": 3.8772848870509724e-07, "loss": 0.2581, "step": 14915 }, { "epoch": 2.16, "grad_norm": 8.099172592163086, "learning_rate": 3.876033506362217e-07, "loss": 0.3531, "step": 14916 }, { "epoch": 2.16, "grad_norm": 8.02221965789795, "learning_rate": 3.8747822790994934e-07, "loss": 0.3404, "step": 14917 }, { "epoch": 2.16, "grad_norm": 8.582609176635742, "learning_rate": 3.873531205294146e-07, "loss": 0.3779, "step": 14918 }, { "epoch": 2.16, "grad_norm": 8.190552711486816, "learning_rate": 3.8722802849775203e-07, "loss": 0.3718, "step": 14919 }, { "epoch": 2.16, "grad_norm": 7.1603803634643555, "learning_rate": 3.8710295181809547e-07, "loss": 0.3357, "step": 14920 }, { "epoch": 2.16, "grad_norm": 7.438586235046387, "learning_rate": 3.8697789049357876e-07, "loss": 0.3237, "step": 14921 }, { "epoch": 2.17, "grad_norm": 8.514312744140625, "learning_rate": 3.868528445273348e-07, "loss": 0.3505, "step": 14922 }, { "epoch": 2.17, "grad_norm": 10.213671684265137, "learning_rate": 3.86727813922497e-07, "loss": 0.3628, "step": 14923 }, { "epoch": 2.17, "grad_norm": 8.44301986694336, "learning_rate": 3.866027986821966e-07, "loss": 0.3812, "step": 14924 }, { "epoch": 2.17, "grad_norm": 7.694341659545898, "learning_rate": 3.864777988095671e-07, "loss": 0.3201, "step": 14925 }, { "epoch": 2.17, "grad_norm": 9.319975852966309, "learning_rate": 3.8635281430773893e-07, "loss": 0.4024, "step": 14926 }, { "epoch": 2.17, "grad_norm": 8.048054695129395, "learning_rate": 3.862278451798444e-07, "loss": 0.3448, "step": 14927 }, { "epoch": 2.17, "grad_norm": 9.059349060058594, "learning_rate": 3.861028914290134e-07, "loss": 0.3657, "step": 14928 }, { "epoch": 2.17, "grad_norm": 10.582682609558105, "learning_rate": 3.859779530583777e-07, "loss": 0.4161, "step": 14929 }, { "epoch": 2.17, "grad_norm": 6.898043155670166, "learning_rate": 3.858530300710662e-07, "loss": 0.2875, "step": 14930 }, { "epoch": 2.17, "grad_norm": 8.225386619567871, "learning_rate": 3.857281224702089e-07, "loss": 0.2961, "step": 14931 }, { "epoch": 2.17, "grad_norm": 8.869233131408691, "learning_rate": 3.8560323025893604e-07, "loss": 0.3128, "step": 14932 }, { "epoch": 2.17, "grad_norm": 7.658505439758301, "learning_rate": 3.854783534403754e-07, "loss": 0.3191, "step": 14933 }, { "epoch": 2.17, "grad_norm": 8.13048267364502, "learning_rate": 3.853534920176567e-07, "loss": 0.3391, "step": 14934 }, { "epoch": 2.17, "grad_norm": 10.188340187072754, "learning_rate": 3.8522864599390727e-07, "loss": 0.3761, "step": 14935 }, { "epoch": 2.17, "grad_norm": 9.56297492980957, "learning_rate": 3.851038153722551e-07, "loss": 0.3864, "step": 14936 }, { "epoch": 2.17, "grad_norm": 7.838150501251221, "learning_rate": 3.8497900015582783e-07, "loss": 0.3296, "step": 14937 }, { "epoch": 2.17, "grad_norm": 8.09757137298584, "learning_rate": 3.848542003477524e-07, "loss": 0.3911, "step": 14938 }, { "epoch": 2.17, "grad_norm": 8.554248809814453, "learning_rate": 3.847294159511555e-07, "loss": 0.3588, "step": 14939 }, { "epoch": 2.17, "grad_norm": 7.347989559173584, "learning_rate": 3.846046469691633e-07, "loss": 0.3092, "step": 14940 }, { "epoch": 2.17, "grad_norm": 10.384056091308594, "learning_rate": 3.844798934049018e-07, "loss": 0.358, "step": 14941 }, { "epoch": 2.17, "grad_norm": 9.21504020690918, "learning_rate": 3.843551552614964e-07, "loss": 0.3557, "step": 14942 }, { "epoch": 2.17, "grad_norm": 7.943398475646973, "learning_rate": 3.842304325420724e-07, "loss": 0.3654, "step": 14943 }, { "epoch": 2.17, "grad_norm": 8.005048751831055, "learning_rate": 3.841057252497546e-07, "loss": 0.3713, "step": 14944 }, { "epoch": 2.17, "grad_norm": 8.492644309997559, "learning_rate": 3.839810333876665e-07, "loss": 0.3507, "step": 14945 }, { "epoch": 2.17, "grad_norm": 7.984929084777832, "learning_rate": 3.838563569589335e-07, "loss": 0.3491, "step": 14946 }, { "epoch": 2.17, "grad_norm": 9.282724380493164, "learning_rate": 3.837316959666774e-07, "loss": 0.3595, "step": 14947 }, { "epoch": 2.17, "grad_norm": 9.420329093933105, "learning_rate": 3.836070504140232e-07, "loss": 0.3897, "step": 14948 }, { "epoch": 2.17, "grad_norm": 8.620089530944824, "learning_rate": 3.8348242030409206e-07, "loss": 0.3374, "step": 14949 }, { "epoch": 2.17, "grad_norm": 9.048667907714844, "learning_rate": 3.8335780564000787e-07, "loss": 0.3746, "step": 14950 }, { "epoch": 2.17, "grad_norm": 7.977363109588623, "learning_rate": 3.8323320642489153e-07, "loss": 0.3768, "step": 14951 }, { "epoch": 2.17, "grad_norm": 8.614944458007812, "learning_rate": 3.8310862266186516e-07, "loss": 0.3686, "step": 14952 }, { "epoch": 2.17, "grad_norm": 9.439306259155273, "learning_rate": 3.829840543540498e-07, "loss": 0.4226, "step": 14953 }, { "epoch": 2.17, "grad_norm": 7.718566417694092, "learning_rate": 3.828595015045665e-07, "loss": 0.291, "step": 14954 }, { "epoch": 2.17, "grad_norm": 8.1193265914917, "learning_rate": 3.827349641165355e-07, "loss": 0.3703, "step": 14955 }, { "epoch": 2.17, "grad_norm": 8.31970500946045, "learning_rate": 3.8261044219307716e-07, "loss": 0.3407, "step": 14956 }, { "epoch": 2.17, "grad_norm": 6.992304801940918, "learning_rate": 3.82485935737311e-07, "loss": 0.2893, "step": 14957 }, { "epoch": 2.17, "grad_norm": 8.047308921813965, "learning_rate": 3.823614447523563e-07, "loss": 0.3615, "step": 14958 }, { "epoch": 2.17, "grad_norm": 8.59448528289795, "learning_rate": 3.822369692413321e-07, "loss": 0.3633, "step": 14959 }, { "epoch": 2.17, "grad_norm": 8.405940055847168, "learning_rate": 3.821125092073569e-07, "loss": 0.3329, "step": 14960 }, { "epoch": 2.17, "grad_norm": 9.893171310424805, "learning_rate": 3.819880646535488e-07, "loss": 0.3425, "step": 14961 }, { "epoch": 2.17, "grad_norm": 7.576712608337402, "learning_rate": 3.818636355830256e-07, "loss": 0.272, "step": 14962 }, { "epoch": 2.17, "grad_norm": 9.665105819702148, "learning_rate": 3.8173922199890464e-07, "loss": 0.3877, "step": 14963 }, { "epoch": 2.17, "grad_norm": 8.777848243713379, "learning_rate": 3.8161482390430287e-07, "loss": 0.3404, "step": 14964 }, { "epoch": 2.17, "grad_norm": 9.684435844421387, "learning_rate": 3.814904413023372e-07, "loss": 0.4095, "step": 14965 }, { "epoch": 2.17, "grad_norm": 8.02152156829834, "learning_rate": 3.81366074196123e-07, "loss": 0.3404, "step": 14966 }, { "epoch": 2.17, "grad_norm": 7.577722549438477, "learning_rate": 3.812417225887773e-07, "loss": 0.3223, "step": 14967 }, { "epoch": 2.17, "grad_norm": 7.686081409454346, "learning_rate": 3.8111738648341405e-07, "loss": 0.3189, "step": 14968 }, { "epoch": 2.17, "grad_norm": 7.280989170074463, "learning_rate": 3.8099306588314993e-07, "loss": 0.3354, "step": 14969 }, { "epoch": 2.17, "grad_norm": 8.600407600402832, "learning_rate": 3.8086876079109844e-07, "loss": 0.3487, "step": 14970 }, { "epoch": 2.17, "grad_norm": 7.946253299713135, "learning_rate": 3.80744471210374e-07, "loss": 0.3076, "step": 14971 }, { "epoch": 2.17, "grad_norm": 8.911147117614746, "learning_rate": 3.806201971440908e-07, "loss": 0.3579, "step": 14972 }, { "epoch": 2.17, "grad_norm": 7.955097198486328, "learning_rate": 3.8049593859536213e-07, "loss": 0.3451, "step": 14973 }, { "epoch": 2.17, "grad_norm": 8.715786933898926, "learning_rate": 3.803716955673011e-07, "loss": 0.3801, "step": 14974 }, { "epoch": 2.17, "grad_norm": 7.783660888671875, "learning_rate": 3.802474680630204e-07, "loss": 0.3692, "step": 14975 }, { "epoch": 2.17, "grad_norm": 7.629445552825928, "learning_rate": 3.8012325608563243e-07, "loss": 0.3523, "step": 14976 }, { "epoch": 2.17, "grad_norm": 8.140228271484375, "learning_rate": 3.7999905963824916e-07, "loss": 0.3545, "step": 14977 }, { "epoch": 2.17, "grad_norm": 8.399767875671387, "learning_rate": 3.798748787239821e-07, "loss": 0.3661, "step": 14978 }, { "epoch": 2.17, "grad_norm": 7.669859886169434, "learning_rate": 3.7975071334594254e-07, "loss": 0.3768, "step": 14979 }, { "epoch": 2.17, "grad_norm": 8.263276100158691, "learning_rate": 3.7962656350724043e-07, "loss": 0.3065, "step": 14980 }, { "epoch": 2.17, "grad_norm": 9.104514122009277, "learning_rate": 3.7950242921098754e-07, "loss": 0.4152, "step": 14981 }, { "epoch": 2.17, "grad_norm": 8.235025405883789, "learning_rate": 3.793783104602923e-07, "loss": 0.3286, "step": 14982 }, { "epoch": 2.17, "grad_norm": 8.99019718170166, "learning_rate": 3.792542072582654e-07, "loss": 0.3397, "step": 14983 }, { "epoch": 2.17, "grad_norm": 7.796778202056885, "learning_rate": 3.7913011960801575e-07, "loss": 0.3821, "step": 14984 }, { "epoch": 2.17, "grad_norm": 7.666988372802734, "learning_rate": 3.790060475126523e-07, "loss": 0.3074, "step": 14985 }, { "epoch": 2.17, "grad_norm": 7.89490270614624, "learning_rate": 3.788819909752835e-07, "loss": 0.3384, "step": 14986 }, { "epoch": 2.17, "grad_norm": 9.12137222290039, "learning_rate": 3.7875794999901655e-07, "loss": 0.3607, "step": 14987 }, { "epoch": 2.17, "grad_norm": 7.915557861328125, "learning_rate": 3.786339245869605e-07, "loss": 0.3402, "step": 14988 }, { "epoch": 2.17, "grad_norm": 8.717881202697754, "learning_rate": 3.7850991474222117e-07, "loss": 0.4032, "step": 14989 }, { "epoch": 2.17, "grad_norm": 8.339829444885254, "learning_rate": 3.7838592046790673e-07, "loss": 0.335, "step": 14990 }, { "epoch": 2.18, "grad_norm": 7.195242881774902, "learning_rate": 3.782619417671227e-07, "loss": 0.3122, "step": 14991 }, { "epoch": 2.18, "grad_norm": 7.186312675476074, "learning_rate": 3.7813797864297545e-07, "loss": 0.332, "step": 14992 }, { "epoch": 2.18, "grad_norm": 8.436480522155762, "learning_rate": 3.780140310985709e-07, "loss": 0.3914, "step": 14993 }, { "epoch": 2.18, "grad_norm": 8.523393630981445, "learning_rate": 3.77890099137014e-07, "loss": 0.3491, "step": 14994 }, { "epoch": 2.18, "grad_norm": 9.636529922485352, "learning_rate": 3.7776618276140993e-07, "loss": 0.3814, "step": 14995 }, { "epoch": 2.18, "grad_norm": 8.58530330657959, "learning_rate": 3.7764228197486314e-07, "loss": 0.3679, "step": 14996 }, { "epoch": 2.18, "grad_norm": 9.13101577758789, "learning_rate": 3.775183967804777e-07, "loss": 0.3954, "step": 14997 }, { "epoch": 2.18, "grad_norm": 8.516643524169922, "learning_rate": 3.7739452718135746e-07, "loss": 0.3746, "step": 14998 }, { "epoch": 2.18, "grad_norm": 9.87539005279541, "learning_rate": 3.772706731806056e-07, "loss": 0.3651, "step": 14999 }, { "epoch": 2.18, "grad_norm": 8.810487747192383, "learning_rate": 3.771468347813257e-07, "loss": 0.3758, "step": 15000 }, { "epoch": 2.18, "grad_norm": 7.7444634437561035, "learning_rate": 3.77023011986619e-07, "loss": 0.3402, "step": 15001 }, { "epoch": 2.18, "grad_norm": 8.030028343200684, "learning_rate": 3.7689920479958927e-07, "loss": 0.3895, "step": 15002 }, { "epoch": 2.18, "grad_norm": 7.999873161315918, "learning_rate": 3.7677541322333686e-07, "loss": 0.3515, "step": 15003 }, { "epoch": 2.18, "grad_norm": 8.037013053894043, "learning_rate": 3.766516372609644e-07, "loss": 0.301, "step": 15004 }, { "epoch": 2.18, "grad_norm": 9.02839469909668, "learning_rate": 3.7652787691557207e-07, "loss": 0.3285, "step": 15005 }, { "epoch": 2.18, "grad_norm": 8.583746910095215, "learning_rate": 3.7640413219026077e-07, "loss": 0.3374, "step": 15006 }, { "epoch": 2.18, "grad_norm": 7.649093151092529, "learning_rate": 3.762804030881307e-07, "loss": 0.3054, "step": 15007 }, { "epoch": 2.18, "grad_norm": 8.437668800354004, "learning_rate": 3.7615668961228164e-07, "loss": 0.3271, "step": 15008 }, { "epoch": 2.18, "grad_norm": 9.464473724365234, "learning_rate": 3.7603299176581314e-07, "loss": 0.3354, "step": 15009 }, { "epoch": 2.18, "grad_norm": 7.9658050537109375, "learning_rate": 3.759093095518239e-07, "loss": 0.3558, "step": 15010 }, { "epoch": 2.18, "grad_norm": 8.86025333404541, "learning_rate": 3.757856429734134e-07, "loss": 0.3428, "step": 15011 }, { "epoch": 2.18, "grad_norm": 8.550646781921387, "learning_rate": 3.756619920336791e-07, "loss": 0.343, "step": 15012 }, { "epoch": 2.18, "grad_norm": 8.977655410766602, "learning_rate": 3.755383567357192e-07, "loss": 0.3385, "step": 15013 }, { "epoch": 2.18, "grad_norm": 7.845209121704102, "learning_rate": 3.7541473708263106e-07, "loss": 0.361, "step": 15014 }, { "epoch": 2.18, "grad_norm": 8.441176414489746, "learning_rate": 3.7529113307751193e-07, "loss": 0.3285, "step": 15015 }, { "epoch": 2.18, "grad_norm": 8.319127082824707, "learning_rate": 3.7516754472345833e-07, "loss": 0.3506, "step": 15016 }, { "epoch": 2.18, "grad_norm": 7.955469131469727, "learning_rate": 3.7504397202356664e-07, "loss": 0.355, "step": 15017 }, { "epoch": 2.18, "grad_norm": 8.696828842163086, "learning_rate": 3.7492041498093286e-07, "loss": 0.3969, "step": 15018 }, { "epoch": 2.18, "grad_norm": 8.603888511657715, "learning_rate": 3.747968735986523e-07, "loss": 0.3352, "step": 15019 }, { "epoch": 2.18, "grad_norm": 8.333760261535645, "learning_rate": 3.746733478798204e-07, "loss": 0.299, "step": 15020 }, { "epoch": 2.18, "grad_norm": 7.313259124755859, "learning_rate": 3.745498378275319e-07, "loss": 0.2939, "step": 15021 }, { "epoch": 2.18, "grad_norm": 9.172158241271973, "learning_rate": 3.7442634344488047e-07, "loss": 0.3564, "step": 15022 }, { "epoch": 2.18, "grad_norm": 8.329264640808105, "learning_rate": 3.743028647349611e-07, "loss": 0.3462, "step": 15023 }, { "epoch": 2.18, "grad_norm": 7.506739139556885, "learning_rate": 3.7417940170086617e-07, "loss": 0.35, "step": 15024 }, { "epoch": 2.18, "grad_norm": 9.023391723632812, "learning_rate": 3.7405595434569e-07, "loss": 0.3732, "step": 15025 }, { "epoch": 2.18, "grad_norm": 8.562580108642578, "learning_rate": 3.7393252267252474e-07, "loss": 0.3591, "step": 15026 }, { "epoch": 2.18, "grad_norm": 8.38891315460205, "learning_rate": 3.7380910668446273e-07, "loss": 0.3898, "step": 15027 }, { "epoch": 2.18, "grad_norm": 7.290693283081055, "learning_rate": 3.736857063845962e-07, "loss": 0.3324, "step": 15028 }, { "epoch": 2.18, "grad_norm": 7.959177017211914, "learning_rate": 3.7356232177601656e-07, "loss": 0.3351, "step": 15029 }, { "epoch": 2.18, "grad_norm": 7.982916831970215, "learning_rate": 3.7343895286181515e-07, "loss": 0.3241, "step": 15030 }, { "epoch": 2.18, "grad_norm": 9.336576461791992, "learning_rate": 3.7331559964508264e-07, "loss": 0.2995, "step": 15031 }, { "epoch": 2.18, "grad_norm": 8.76516056060791, "learning_rate": 3.7319226212890954e-07, "loss": 0.4057, "step": 15032 }, { "epoch": 2.18, "grad_norm": 8.28185749053955, "learning_rate": 3.7306894031638593e-07, "loss": 0.3352, "step": 15033 }, { "epoch": 2.18, "grad_norm": 7.94182014465332, "learning_rate": 3.729456342106013e-07, "loss": 0.3436, "step": 15034 }, { "epoch": 2.18, "grad_norm": 8.80627727508545, "learning_rate": 3.7282234381464495e-07, "loss": 0.3146, "step": 15035 }, { "epoch": 2.18, "grad_norm": 8.878843307495117, "learning_rate": 3.7269906913160574e-07, "loss": 0.3721, "step": 15036 }, { "epoch": 2.18, "grad_norm": 8.827179908752441, "learning_rate": 3.7257581016457216e-07, "loss": 0.3601, "step": 15037 }, { "epoch": 2.18, "grad_norm": 8.259004592895508, "learning_rate": 3.724525669166321e-07, "loss": 0.3005, "step": 15038 }, { "epoch": 2.18, "grad_norm": 7.6010870933532715, "learning_rate": 3.723293393908734e-07, "loss": 0.3207, "step": 15039 }, { "epoch": 2.18, "grad_norm": 9.325634002685547, "learning_rate": 3.722061275903832e-07, "loss": 0.3917, "step": 15040 }, { "epoch": 2.18, "grad_norm": 7.722661972045898, "learning_rate": 3.7208293151824845e-07, "loss": 0.3494, "step": 15041 }, { "epoch": 2.18, "grad_norm": 8.068892478942871, "learning_rate": 3.7195975117755584e-07, "loss": 0.3724, "step": 15042 }, { "epoch": 2.18, "grad_norm": 8.595808982849121, "learning_rate": 3.718365865713906e-07, "loss": 0.3655, "step": 15043 }, { "epoch": 2.18, "grad_norm": 7.443127632141113, "learning_rate": 3.717134377028397e-07, "loss": 0.3437, "step": 15044 }, { "epoch": 2.18, "grad_norm": 8.62838363647461, "learning_rate": 3.715903045749871e-07, "loss": 0.3861, "step": 15045 }, { "epoch": 2.18, "grad_norm": 9.044958114624023, "learning_rate": 3.7146718719091894e-07, "loss": 0.3563, "step": 15046 }, { "epoch": 2.18, "grad_norm": 8.870977401733398, "learning_rate": 3.7134408555371887e-07, "loss": 0.3944, "step": 15047 }, { "epoch": 2.18, "grad_norm": 8.49268627166748, "learning_rate": 3.7122099966647123e-07, "loss": 0.3895, "step": 15048 }, { "epoch": 2.18, "grad_norm": 8.193933486938477, "learning_rate": 3.710979295322599e-07, "loss": 0.4043, "step": 15049 }, { "epoch": 2.18, "grad_norm": 7.943180561065674, "learning_rate": 3.7097487515416804e-07, "loss": 0.4183, "step": 15050 }, { "epoch": 2.18, "grad_norm": 7.818861484527588, "learning_rate": 3.708518365352786e-07, "loss": 0.2837, "step": 15051 }, { "epoch": 2.18, "grad_norm": 8.284322738647461, "learning_rate": 3.7072881367867416e-07, "loss": 0.3541, "step": 15052 }, { "epoch": 2.18, "grad_norm": 10.287543296813965, "learning_rate": 3.7060580658743677e-07, "loss": 0.4087, "step": 15053 }, { "epoch": 2.18, "grad_norm": 7.785709857940674, "learning_rate": 3.704828152646483e-07, "loss": 0.3026, "step": 15054 }, { "epoch": 2.18, "grad_norm": 9.70348072052002, "learning_rate": 3.7035983971339004e-07, "loss": 0.3605, "step": 15055 }, { "epoch": 2.18, "grad_norm": 8.824485778808594, "learning_rate": 3.7023687993674323e-07, "loss": 0.3525, "step": 15056 }, { "epoch": 2.18, "grad_norm": 8.30159854888916, "learning_rate": 3.701139359377875e-07, "loss": 0.2935, "step": 15057 }, { "epoch": 2.18, "grad_norm": 8.42044448852539, "learning_rate": 3.6999100771960435e-07, "loss": 0.3882, "step": 15058 }, { "epoch": 2.18, "grad_norm": 9.26541519165039, "learning_rate": 3.6986809528527216e-07, "loss": 0.3627, "step": 15059 }, { "epoch": 2.19, "grad_norm": 9.742240905761719, "learning_rate": 3.697451986378717e-07, "loss": 0.4333, "step": 15060 }, { "epoch": 2.19, "grad_norm": 8.30910873413086, "learning_rate": 3.696223177804808e-07, "loss": 0.3127, "step": 15061 }, { "epoch": 2.19, "grad_norm": 8.934746742248535, "learning_rate": 3.694994527161781e-07, "loss": 0.3717, "step": 15062 }, { "epoch": 2.19, "grad_norm": 8.67951488494873, "learning_rate": 3.693766034480429e-07, "loss": 0.3787, "step": 15063 }, { "epoch": 2.19, "grad_norm": 8.240579605102539, "learning_rate": 3.6925376997915157e-07, "loss": 0.3334, "step": 15064 }, { "epoch": 2.19, "grad_norm": 9.19056224822998, "learning_rate": 3.6913095231258283e-07, "loss": 0.4003, "step": 15065 }, { "epoch": 2.19, "grad_norm": 8.533679008483887, "learning_rate": 3.6900815045141245e-07, "loss": 0.389, "step": 15066 }, { "epoch": 2.19, "grad_norm": 9.942426681518555, "learning_rate": 3.688853643987182e-07, "loss": 0.4231, "step": 15067 }, { "epoch": 2.19, "grad_norm": 9.088462829589844, "learning_rate": 3.6876259415757535e-07, "loss": 0.3278, "step": 15068 }, { "epoch": 2.19, "grad_norm": 9.844295501708984, "learning_rate": 3.6863983973106005e-07, "loss": 0.3751, "step": 15069 }, { "epoch": 2.19, "grad_norm": 8.199867248535156, "learning_rate": 3.6851710112224765e-07, "loss": 0.3479, "step": 15070 }, { "epoch": 2.19, "grad_norm": 8.099617958068848, "learning_rate": 3.683943783342134e-07, "loss": 0.2946, "step": 15071 }, { "epoch": 2.19, "grad_norm": 7.967506408691406, "learning_rate": 3.6827167137003156e-07, "loss": 0.3677, "step": 15072 }, { "epoch": 2.19, "grad_norm": 9.659934997558594, "learning_rate": 3.6814898023277665e-07, "loss": 0.4271, "step": 15073 }, { "epoch": 2.19, "grad_norm": 8.194072723388672, "learning_rate": 3.6802630492552244e-07, "loss": 0.3429, "step": 15074 }, { "epoch": 2.19, "grad_norm": 8.24750804901123, "learning_rate": 3.6790364545134224e-07, "loss": 0.3244, "step": 15075 }, { "epoch": 2.19, "grad_norm": 8.581758499145508, "learning_rate": 3.677810018133092e-07, "loss": 0.3912, "step": 15076 }, { "epoch": 2.19, "grad_norm": 8.115221977233887, "learning_rate": 3.6765837401449615e-07, "loss": 0.3459, "step": 15077 }, { "epoch": 2.19, "grad_norm": 8.662901878356934, "learning_rate": 3.6753576205797443e-07, "loss": 0.41, "step": 15078 }, { "epoch": 2.19, "grad_norm": 9.342117309570312, "learning_rate": 3.6741316594681727e-07, "loss": 0.3669, "step": 15079 }, { "epoch": 2.19, "grad_norm": 7.75818395614624, "learning_rate": 3.672905856840948e-07, "loss": 0.3691, "step": 15080 }, { "epoch": 2.19, "grad_norm": 8.650626182556152, "learning_rate": 3.671680212728792e-07, "loss": 0.4133, "step": 15081 }, { "epoch": 2.19, "grad_norm": 10.89421272277832, "learning_rate": 3.670454727162403e-07, "loss": 0.4846, "step": 15082 }, { "epoch": 2.19, "grad_norm": 9.032759666442871, "learning_rate": 3.6692294001724864e-07, "loss": 0.4387, "step": 15083 }, { "epoch": 2.19, "grad_norm": 8.284531593322754, "learning_rate": 3.6680042317897407e-07, "loss": 0.3232, "step": 15084 }, { "epoch": 2.19, "grad_norm": 10.84361743927002, "learning_rate": 3.66677922204486e-07, "loss": 0.4211, "step": 15085 }, { "epoch": 2.19, "grad_norm": 7.780280113220215, "learning_rate": 3.6655543709685365e-07, "loss": 0.2805, "step": 15086 }, { "epoch": 2.19, "grad_norm": 7.519711971282959, "learning_rate": 3.6643296785914557e-07, "loss": 0.3497, "step": 15087 }, { "epoch": 2.19, "grad_norm": 7.781705856323242, "learning_rate": 3.6631051449443e-07, "loss": 0.3609, "step": 15088 }, { "epoch": 2.19, "grad_norm": 8.594027519226074, "learning_rate": 3.661880770057748e-07, "loss": 0.3751, "step": 15089 }, { "epoch": 2.19, "grad_norm": 9.389384269714355, "learning_rate": 3.660656553962476e-07, "loss": 0.4254, "step": 15090 }, { "epoch": 2.19, "grad_norm": 8.774422645568848, "learning_rate": 3.659432496689153e-07, "loss": 0.3639, "step": 15091 }, { "epoch": 2.19, "grad_norm": 7.628072261810303, "learning_rate": 3.6582085982684463e-07, "loss": 0.3712, "step": 15092 }, { "epoch": 2.19, "grad_norm": 8.31910514831543, "learning_rate": 3.6569848587310193e-07, "loss": 0.3848, "step": 15093 }, { "epoch": 2.19, "grad_norm": 7.5996317863464355, "learning_rate": 3.655761278107531e-07, "loss": 0.3249, "step": 15094 }, { "epoch": 2.19, "grad_norm": 8.406494140625, "learning_rate": 3.6545378564286344e-07, "loss": 0.3823, "step": 15095 }, { "epoch": 2.19, "grad_norm": 7.71156644821167, "learning_rate": 3.653314593724985e-07, "loss": 0.3504, "step": 15096 }, { "epoch": 2.19, "grad_norm": 8.180131912231445, "learning_rate": 3.6520914900272206e-07, "loss": 0.365, "step": 15097 }, { "epoch": 2.19, "grad_norm": 8.887953758239746, "learning_rate": 3.6508685453659947e-07, "loss": 0.326, "step": 15098 }, { "epoch": 2.19, "grad_norm": 8.899635314941406, "learning_rate": 3.6496457597719357e-07, "loss": 0.3591, "step": 15099 }, { "epoch": 2.19, "grad_norm": 8.682088851928711, "learning_rate": 3.648423133275691e-07, "loss": 0.3948, "step": 15100 }, { "epoch": 2.19, "grad_norm": 10.988194465637207, "learning_rate": 3.6472006659078767e-07, "loss": 0.3892, "step": 15101 }, { "epoch": 2.19, "grad_norm": 8.95329761505127, "learning_rate": 3.645978357699134e-07, "loss": 0.3874, "step": 15102 }, { "epoch": 2.19, "grad_norm": 9.040864944458008, "learning_rate": 3.6447562086800764e-07, "loss": 0.4147, "step": 15103 }, { "epoch": 2.19, "grad_norm": 8.265141487121582, "learning_rate": 3.6435342188813265e-07, "loss": 0.3242, "step": 15104 }, { "epoch": 2.19, "grad_norm": 7.846938133239746, "learning_rate": 3.6423123883334984e-07, "loss": 0.3787, "step": 15105 }, { "epoch": 2.19, "grad_norm": 8.92137622833252, "learning_rate": 3.641090717067202e-07, "loss": 0.3547, "step": 15106 }, { "epoch": 2.19, "grad_norm": 8.267147064208984, "learning_rate": 3.639869205113046e-07, "loss": 0.3084, "step": 15107 }, { "epoch": 2.19, "grad_norm": 8.628121376037598, "learning_rate": 3.638647852501634e-07, "loss": 0.3162, "step": 15108 }, { "epoch": 2.19, "grad_norm": 7.602623462677002, "learning_rate": 3.637426659263564e-07, "loss": 0.3404, "step": 15109 }, { "epoch": 2.19, "grad_norm": 8.522074699401855, "learning_rate": 3.6362056254294293e-07, "loss": 0.3279, "step": 15110 }, { "epoch": 2.19, "grad_norm": 7.753031253814697, "learning_rate": 3.634984751029824e-07, "loss": 0.3171, "step": 15111 }, { "epoch": 2.19, "grad_norm": 7.4756293296813965, "learning_rate": 3.633764036095336e-07, "loss": 0.2847, "step": 15112 }, { "epoch": 2.19, "grad_norm": 9.79778003692627, "learning_rate": 3.63254348065654e-07, "loss": 0.3692, "step": 15113 }, { "epoch": 2.19, "grad_norm": 8.23486614227295, "learning_rate": 3.6313230847440246e-07, "loss": 0.3093, "step": 15114 }, { "epoch": 2.19, "grad_norm": 7.789710998535156, "learning_rate": 3.630102848388361e-07, "loss": 0.331, "step": 15115 }, { "epoch": 2.19, "grad_norm": 8.511028289794922, "learning_rate": 3.628882771620121e-07, "loss": 0.4029, "step": 15116 }, { "epoch": 2.19, "grad_norm": 8.358258247375488, "learning_rate": 3.627662854469874e-07, "loss": 0.337, "step": 15117 }, { "epoch": 2.19, "grad_norm": 8.523092269897461, "learning_rate": 3.6264430969681736e-07, "loss": 0.3299, "step": 15118 }, { "epoch": 2.19, "grad_norm": 8.302123069763184, "learning_rate": 3.625223499145592e-07, "loss": 0.3551, "step": 15119 }, { "epoch": 2.19, "grad_norm": 8.308181762695312, "learning_rate": 3.6240040610326703e-07, "loss": 0.4033, "step": 15120 }, { "epoch": 2.19, "grad_norm": 8.386314392089844, "learning_rate": 3.6227847826599754e-07, "loss": 0.3434, "step": 15121 }, { "epoch": 2.19, "grad_norm": 7.982327461242676, "learning_rate": 3.6215656640580384e-07, "loss": 0.323, "step": 15122 }, { "epoch": 2.19, "grad_norm": 7.3379058837890625, "learning_rate": 3.6203467052574167e-07, "loss": 0.3574, "step": 15123 }, { "epoch": 2.19, "grad_norm": 8.235333442687988, "learning_rate": 3.6191279062886383e-07, "loss": 0.3309, "step": 15124 }, { "epoch": 2.19, "grad_norm": 7.523271083831787, "learning_rate": 3.617909267182243e-07, "loss": 0.3367, "step": 15125 }, { "epoch": 2.19, "grad_norm": 8.176021575927734, "learning_rate": 3.6166907879687613e-07, "loss": 0.3499, "step": 15126 }, { "epoch": 2.19, "grad_norm": 8.377798080444336, "learning_rate": 3.61547246867872e-07, "loss": 0.3431, "step": 15127 }, { "epoch": 2.2, "grad_norm": 7.342263221740723, "learning_rate": 3.614254309342641e-07, "loss": 0.3259, "step": 15128 }, { "epoch": 2.2, "grad_norm": 8.025751113891602, "learning_rate": 3.6130363099910466e-07, "loss": 0.3415, "step": 15129 }, { "epoch": 2.2, "grad_norm": 9.55744743347168, "learning_rate": 3.611818470654449e-07, "loss": 0.4515, "step": 15130 }, { "epoch": 2.2, "grad_norm": 7.224030494689941, "learning_rate": 3.6106007913633594e-07, "loss": 0.2965, "step": 15131 }, { "epoch": 2.2, "grad_norm": 7.278292179107666, "learning_rate": 3.6093832721482854e-07, "loss": 0.343, "step": 15132 }, { "epoch": 2.2, "grad_norm": 9.324119567871094, "learning_rate": 3.6081659130397333e-07, "loss": 0.4155, "step": 15133 }, { "epoch": 2.2, "grad_norm": 8.274760246276855, "learning_rate": 3.606948714068192e-07, "loss": 0.3637, "step": 15134 }, { "epoch": 2.2, "grad_norm": 8.534012794494629, "learning_rate": 3.605731675264171e-07, "loss": 0.4026, "step": 15135 }, { "epoch": 2.2, "grad_norm": 7.732069969177246, "learning_rate": 3.604514796658145e-07, "loss": 0.3443, "step": 15136 }, { "epoch": 2.2, "grad_norm": 7.460672855377197, "learning_rate": 3.6032980782806157e-07, "loss": 0.3283, "step": 15137 }, { "epoch": 2.2, "grad_norm": 8.3676176071167, "learning_rate": 3.602081520162057e-07, "loss": 0.3848, "step": 15138 }, { "epoch": 2.2, "grad_norm": 9.061223983764648, "learning_rate": 3.60086512233295e-07, "loss": 0.3721, "step": 15139 }, { "epoch": 2.2, "grad_norm": 8.247776985168457, "learning_rate": 3.5996488848237706e-07, "loss": 0.4186, "step": 15140 }, { "epoch": 2.2, "grad_norm": 8.208797454833984, "learning_rate": 3.598432807664984e-07, "loss": 0.3684, "step": 15141 }, { "epoch": 2.2, "grad_norm": 7.290604114532471, "learning_rate": 3.5972168908870694e-07, "loss": 0.3051, "step": 15142 }, { "epoch": 2.2, "grad_norm": 8.529291152954102, "learning_rate": 3.596001134520479e-07, "loss": 0.3908, "step": 15143 }, { "epoch": 2.2, "grad_norm": 8.876028060913086, "learning_rate": 3.594785538595675e-07, "loss": 0.3059, "step": 15144 }, { "epoch": 2.2, "grad_norm": 8.069920539855957, "learning_rate": 3.593570103143112e-07, "loss": 0.3593, "step": 15145 }, { "epoch": 2.2, "grad_norm": 9.077225685119629, "learning_rate": 3.59235482819324e-07, "loss": 0.3642, "step": 15146 }, { "epoch": 2.2, "grad_norm": 8.617587089538574, "learning_rate": 3.5911397137765064e-07, "loss": 0.3761, "step": 15147 }, { "epoch": 2.2, "grad_norm": 8.693145751953125, "learning_rate": 3.589924759923355e-07, "loss": 0.3928, "step": 15148 }, { "epoch": 2.2, "grad_norm": 8.441399574279785, "learning_rate": 3.588709966664223e-07, "loss": 0.349, "step": 15149 }, { "epoch": 2.2, "grad_norm": 7.183554649353027, "learning_rate": 3.5874953340295444e-07, "loss": 0.3063, "step": 15150 }, { "epoch": 2.2, "grad_norm": 7.684637546539307, "learning_rate": 3.586280862049752e-07, "loss": 0.293, "step": 15151 }, { "epoch": 2.2, "grad_norm": 8.679581642150879, "learning_rate": 3.585066550755275e-07, "loss": 0.3423, "step": 15152 }, { "epoch": 2.2, "grad_norm": 7.28770112991333, "learning_rate": 3.583852400176526e-07, "loss": 0.3353, "step": 15153 }, { "epoch": 2.2, "grad_norm": 8.562911033630371, "learning_rate": 3.5826384103439355e-07, "loss": 0.3314, "step": 15154 }, { "epoch": 2.2, "grad_norm": 9.269695281982422, "learning_rate": 3.5814245812879064e-07, "loss": 0.3389, "step": 15155 }, { "epoch": 2.2, "grad_norm": 7.624438762664795, "learning_rate": 3.5802109130388626e-07, "loss": 0.3048, "step": 15156 }, { "epoch": 2.2, "grad_norm": 9.748945236206055, "learning_rate": 3.5789974056271957e-07, "loss": 0.42, "step": 15157 }, { "epoch": 2.2, "grad_norm": 7.803538799285889, "learning_rate": 3.577784059083323e-07, "loss": 0.3471, "step": 15158 }, { "epoch": 2.2, "grad_norm": 8.471871376037598, "learning_rate": 3.576570873437632e-07, "loss": 0.3615, "step": 15159 }, { "epoch": 2.2, "grad_norm": 7.552194595336914, "learning_rate": 3.5753578487205204e-07, "loss": 0.3483, "step": 15160 }, { "epoch": 2.2, "grad_norm": 8.815146446228027, "learning_rate": 3.5741449849623794e-07, "loss": 0.3444, "step": 15161 }, { "epoch": 2.2, "grad_norm": 8.114234924316406, "learning_rate": 3.5729322821935957e-07, "loss": 0.378, "step": 15162 }, { "epoch": 2.2, "grad_norm": 8.821305274963379, "learning_rate": 3.5717197404445496e-07, "loss": 0.381, "step": 15163 }, { "epoch": 2.2, "grad_norm": 8.151784896850586, "learning_rate": 3.570507359745621e-07, "loss": 0.3622, "step": 15164 }, { "epoch": 2.2, "grad_norm": 9.412094116210938, "learning_rate": 3.569295140127184e-07, "loss": 0.3536, "step": 15165 }, { "epoch": 2.2, "grad_norm": 8.810192108154297, "learning_rate": 3.5680830816196083e-07, "loss": 0.3573, "step": 15166 }, { "epoch": 2.2, "grad_norm": 8.627431869506836, "learning_rate": 3.56687118425326e-07, "loss": 0.3459, "step": 15167 }, { "epoch": 2.2, "grad_norm": 9.198417663574219, "learning_rate": 3.565659448058501e-07, "loss": 0.323, "step": 15168 }, { "epoch": 2.2, "grad_norm": 8.355937004089355, "learning_rate": 3.56444787306569e-07, "loss": 0.3792, "step": 15169 }, { "epoch": 2.2, "grad_norm": 8.995370864868164, "learning_rate": 3.563236459305182e-07, "loss": 0.3839, "step": 15170 }, { "epoch": 2.2, "grad_norm": 6.818899631500244, "learning_rate": 3.5620252068073254e-07, "loss": 0.3099, "step": 15171 }, { "epoch": 2.2, "grad_norm": 8.143976211547852, "learning_rate": 3.5608141156024663e-07, "loss": 0.3328, "step": 15172 }, { "epoch": 2.2, "grad_norm": 9.430001258850098, "learning_rate": 3.5596031857209495e-07, "loss": 0.4008, "step": 15173 }, { "epoch": 2.2, "grad_norm": 8.275810241699219, "learning_rate": 3.558392417193105e-07, "loss": 0.4313, "step": 15174 }, { "epoch": 2.2, "grad_norm": 8.73489761352539, "learning_rate": 3.557181810049278e-07, "loss": 0.3485, "step": 15175 }, { "epoch": 2.2, "grad_norm": 8.797393798828125, "learning_rate": 3.555971364319785e-07, "loss": 0.3893, "step": 15176 }, { "epoch": 2.2, "grad_norm": 8.298575401306152, "learning_rate": 3.554761080034967e-07, "loss": 0.3906, "step": 15177 }, { "epoch": 2.2, "grad_norm": 9.783829689025879, "learning_rate": 3.553550957225131e-07, "loss": 0.3935, "step": 15178 }, { "epoch": 2.2, "grad_norm": 9.112181663513184, "learning_rate": 3.5523409959206076e-07, "loss": 0.3738, "step": 15179 }, { "epoch": 2.2, "grad_norm": 9.242255210876465, "learning_rate": 3.5511311961517e-07, "loss": 0.3891, "step": 15180 }, { "epoch": 2.2, "grad_norm": 8.749421119689941, "learning_rate": 3.549921557948723e-07, "loss": 0.3075, "step": 15181 }, { "epoch": 2.2, "grad_norm": 8.03956127166748, "learning_rate": 3.548712081341981e-07, "loss": 0.3532, "step": 15182 }, { "epoch": 2.2, "grad_norm": 6.632665157318115, "learning_rate": 3.547502766361775e-07, "loss": 0.2756, "step": 15183 }, { "epoch": 2.2, "grad_norm": 9.258515357971191, "learning_rate": 3.5462936130384036e-07, "loss": 0.4616, "step": 15184 }, { "epoch": 2.2, "grad_norm": 8.614603042602539, "learning_rate": 3.545084621402159e-07, "loss": 0.3722, "step": 15185 }, { "epoch": 2.2, "grad_norm": 7.485339641571045, "learning_rate": 3.543875791483332e-07, "loss": 0.3103, "step": 15186 }, { "epoch": 2.2, "grad_norm": 8.550379753112793, "learning_rate": 3.5426671233122063e-07, "loss": 0.3369, "step": 15187 }, { "epoch": 2.2, "grad_norm": 8.037796020507812, "learning_rate": 3.5414586169190643e-07, "loss": 0.3615, "step": 15188 }, { "epoch": 2.2, "grad_norm": 7.888881683349609, "learning_rate": 3.540250272334185e-07, "loss": 0.3278, "step": 15189 }, { "epoch": 2.2, "grad_norm": 9.370670318603516, "learning_rate": 3.5390420895878325e-07, "loss": 0.331, "step": 15190 }, { "epoch": 2.2, "grad_norm": 6.9715166091918945, "learning_rate": 3.53783406871029e-07, "loss": 0.3388, "step": 15191 }, { "epoch": 2.2, "grad_norm": 9.044654846191406, "learning_rate": 3.536626209731808e-07, "loss": 0.3804, "step": 15192 }, { "epoch": 2.2, "grad_norm": 8.137214660644531, "learning_rate": 3.5354185126826573e-07, "loss": 0.3902, "step": 15193 }, { "epoch": 2.2, "grad_norm": 7.834975719451904, "learning_rate": 3.534210977593096e-07, "loss": 0.3362, "step": 15194 }, { "epoch": 2.2, "grad_norm": 8.777050018310547, "learning_rate": 3.5330036044933663e-07, "loss": 0.3258, "step": 15195 }, { "epoch": 2.2, "grad_norm": 9.605182647705078, "learning_rate": 3.5317963934137297e-07, "loss": 0.347, "step": 15196 }, { "epoch": 2.21, "grad_norm": 9.32010269165039, "learning_rate": 3.530589344384418e-07, "loss": 0.4499, "step": 15197 }, { "epoch": 2.21, "grad_norm": 10.030902862548828, "learning_rate": 3.529382457435686e-07, "loss": 0.2892, "step": 15198 }, { "epoch": 2.21, "grad_norm": 8.104520797729492, "learning_rate": 3.528175732597758e-07, "loss": 0.3374, "step": 15199 }, { "epoch": 2.21, "grad_norm": 8.575658798217773, "learning_rate": 3.526969169900873e-07, "loss": 0.4233, "step": 15200 }, { "epoch": 2.21, "grad_norm": 8.64067554473877, "learning_rate": 3.5257627693752565e-07, "loss": 0.3422, "step": 15201 }, { "epoch": 2.21, "grad_norm": 8.680113792419434, "learning_rate": 3.524556531051135e-07, "loss": 0.3771, "step": 15202 }, { "epoch": 2.21, "grad_norm": 8.79621410369873, "learning_rate": 3.523350454958729e-07, "loss": 0.3547, "step": 15203 }, { "epoch": 2.21, "grad_norm": 7.476760387420654, "learning_rate": 3.522144541128252e-07, "loss": 0.3096, "step": 15204 }, { "epoch": 2.21, "grad_norm": 8.472417831420898, "learning_rate": 3.52093878958992e-07, "loss": 0.2972, "step": 15205 }, { "epoch": 2.21, "grad_norm": 7.849392414093018, "learning_rate": 3.5197332003739377e-07, "loss": 0.3473, "step": 15206 }, { "epoch": 2.21, "grad_norm": 8.302648544311523, "learning_rate": 3.5185277735105125e-07, "loss": 0.3847, "step": 15207 }, { "epoch": 2.21, "grad_norm": 8.403955459594727, "learning_rate": 3.517322509029844e-07, "loss": 0.3396, "step": 15208 }, { "epoch": 2.21, "grad_norm": 9.497515678405762, "learning_rate": 3.5161174069621214e-07, "loss": 0.3941, "step": 15209 }, { "epoch": 2.21, "grad_norm": 8.798373222351074, "learning_rate": 3.5149124673375484e-07, "loss": 0.3278, "step": 15210 }, { "epoch": 2.21, "grad_norm": 8.110628128051758, "learning_rate": 3.5137076901862995e-07, "loss": 0.3254, "step": 15211 }, { "epoch": 2.21, "grad_norm": 10.743085861206055, "learning_rate": 3.512503075538572e-07, "loss": 0.4046, "step": 15212 }, { "epoch": 2.21, "grad_norm": 7.621855735778809, "learning_rate": 3.511298623424532e-07, "loss": 0.3477, "step": 15213 }, { "epoch": 2.21, "grad_norm": 9.356367111206055, "learning_rate": 3.510094333874369e-07, "loss": 0.4212, "step": 15214 }, { "epoch": 2.21, "grad_norm": 9.744959831237793, "learning_rate": 3.5088902069182446e-07, "loss": 0.3838, "step": 15215 }, { "epoch": 2.21, "grad_norm": 6.9938764572143555, "learning_rate": 3.507686242586329e-07, "loss": 0.3422, "step": 15216 }, { "epoch": 2.21, "grad_norm": 8.425397872924805, "learning_rate": 3.5064824409087845e-07, "loss": 0.3848, "step": 15217 }, { "epoch": 2.21, "grad_norm": 8.559004783630371, "learning_rate": 3.5052788019157687e-07, "loss": 0.3413, "step": 15218 }, { "epoch": 2.21, "grad_norm": 9.498883247375488, "learning_rate": 3.5040753256374465e-07, "loss": 0.3605, "step": 15219 }, { "epoch": 2.21, "grad_norm": 8.677331924438477, "learning_rate": 3.502872012103959e-07, "loss": 0.317, "step": 15220 }, { "epoch": 2.21, "grad_norm": 9.526597023010254, "learning_rate": 3.501668861345457e-07, "loss": 0.3725, "step": 15221 }, { "epoch": 2.21, "grad_norm": 9.051648139953613, "learning_rate": 3.500465873392082e-07, "loss": 0.3607, "step": 15222 }, { "epoch": 2.21, "grad_norm": 9.320276260375977, "learning_rate": 3.499263048273975e-07, "loss": 0.3629, "step": 15223 }, { "epoch": 2.21, "grad_norm": 7.790156364440918, "learning_rate": 3.49806038602127e-07, "loss": 0.3878, "step": 15224 }, { "epoch": 2.21, "grad_norm": 7.919057846069336, "learning_rate": 3.4968578866640965e-07, "loss": 0.3632, "step": 15225 }, { "epoch": 2.21, "grad_norm": 8.836416244506836, "learning_rate": 3.4956555502325825e-07, "loss": 0.3957, "step": 15226 }, { "epoch": 2.21, "grad_norm": 7.496901988983154, "learning_rate": 3.49445337675685e-07, "loss": 0.3248, "step": 15227 }, { "epoch": 2.21, "grad_norm": 7.682130813598633, "learning_rate": 3.4932513662670173e-07, "loss": 0.2928, "step": 15228 }, { "epoch": 2.21, "grad_norm": 8.746747016906738, "learning_rate": 3.492049518793203e-07, "loss": 0.3642, "step": 15229 }, { "epoch": 2.21, "grad_norm": 8.346254348754883, "learning_rate": 3.490847834365508e-07, "loss": 0.3879, "step": 15230 }, { "epoch": 2.21, "grad_norm": 9.611141204833984, "learning_rate": 3.4896463130140496e-07, "loss": 0.4012, "step": 15231 }, { "epoch": 2.21, "grad_norm": 8.374152183532715, "learning_rate": 3.488444954768919e-07, "loss": 0.3893, "step": 15232 }, { "epoch": 2.21, "grad_norm": 8.583441734313965, "learning_rate": 3.4872437596602255e-07, "loss": 0.3415, "step": 15233 }, { "epoch": 2.21, "grad_norm": 9.192569732666016, "learning_rate": 3.486042727718055e-07, "loss": 0.3851, "step": 15234 }, { "epoch": 2.21, "grad_norm": 8.140419960021973, "learning_rate": 3.484841858972499e-07, "loss": 0.3485, "step": 15235 }, { "epoch": 2.21, "grad_norm": 8.679893493652344, "learning_rate": 3.483641153453646e-07, "loss": 0.3387, "step": 15236 }, { "epoch": 2.21, "grad_norm": 7.771631240844727, "learning_rate": 3.482440611191574e-07, "loss": 0.3354, "step": 15237 }, { "epoch": 2.21, "grad_norm": 7.858471393585205, "learning_rate": 3.4812402322163634e-07, "loss": 0.2927, "step": 15238 }, { "epoch": 2.21, "grad_norm": 8.894145011901855, "learning_rate": 3.480040016558087e-07, "loss": 0.3771, "step": 15239 }, { "epoch": 2.21, "grad_norm": 7.294279098510742, "learning_rate": 3.4788399642468136e-07, "loss": 0.3377, "step": 15240 }, { "epoch": 2.21, "grad_norm": 7.962400913238525, "learning_rate": 3.47764007531261e-07, "loss": 0.3127, "step": 15241 }, { "epoch": 2.21, "grad_norm": 8.092269897460938, "learning_rate": 3.4764403497855353e-07, "loss": 0.3579, "step": 15242 }, { "epoch": 2.21, "grad_norm": 9.333806991577148, "learning_rate": 3.475240787695652e-07, "loss": 0.4057, "step": 15243 }, { "epoch": 2.21, "grad_norm": 9.523303031921387, "learning_rate": 3.4740413890730027e-07, "loss": 0.4063, "step": 15244 }, { "epoch": 2.21, "grad_norm": 8.64022159576416, "learning_rate": 3.472842153947646e-07, "loss": 0.3529, "step": 15245 }, { "epoch": 2.21, "grad_norm": 8.588862419128418, "learning_rate": 3.4716430823496234e-07, "loss": 0.3272, "step": 15246 }, { "epoch": 2.21, "grad_norm": 10.164628982543945, "learning_rate": 3.470444174308976e-07, "loss": 0.3946, "step": 15247 }, { "epoch": 2.21, "grad_norm": 7.5523176193237305, "learning_rate": 3.469245429855742e-07, "loss": 0.321, "step": 15248 }, { "epoch": 2.21, "grad_norm": 8.116032600402832, "learning_rate": 3.46804684901995e-07, "loss": 0.3156, "step": 15249 }, { "epoch": 2.21, "grad_norm": 9.511933326721191, "learning_rate": 3.466848431831635e-07, "loss": 0.3871, "step": 15250 }, { "epoch": 2.21, "grad_norm": 8.712475776672363, "learning_rate": 3.4656501783208116e-07, "loss": 0.3628, "step": 15251 }, { "epoch": 2.21, "grad_norm": 9.555874824523926, "learning_rate": 3.464452088517511e-07, "loss": 0.4269, "step": 15252 }, { "epoch": 2.21, "grad_norm": 7.5715107917785645, "learning_rate": 3.4632541624517384e-07, "loss": 0.3274, "step": 15253 }, { "epoch": 2.21, "grad_norm": 9.523427963256836, "learning_rate": 3.4620564001535177e-07, "loss": 0.3645, "step": 15254 }, { "epoch": 2.21, "grad_norm": 7.765782833099365, "learning_rate": 3.4608588016528486e-07, "loss": 0.3085, "step": 15255 }, { "epoch": 2.21, "grad_norm": 9.416666030883789, "learning_rate": 3.459661366979736e-07, "loss": 0.3545, "step": 15256 }, { "epoch": 2.21, "grad_norm": 8.57921314239502, "learning_rate": 3.458464096164181e-07, "loss": 0.3608, "step": 15257 }, { "epoch": 2.21, "grad_norm": 7.754101276397705, "learning_rate": 3.4572669892361807e-07, "loss": 0.3412, "step": 15258 }, { "epoch": 2.21, "grad_norm": 7.284878253936768, "learning_rate": 3.456070046225723e-07, "loss": 0.3055, "step": 15259 }, { "epoch": 2.21, "grad_norm": 8.956415176391602, "learning_rate": 3.454873267162799e-07, "loss": 0.3333, "step": 15260 }, { "epoch": 2.21, "grad_norm": 8.319194793701172, "learning_rate": 3.45367665207739e-07, "loss": 0.4269, "step": 15261 }, { "epoch": 2.21, "grad_norm": 9.662677764892578, "learning_rate": 3.452480200999476e-07, "loss": 0.3757, "step": 15262 }, { "epoch": 2.21, "grad_norm": 7.620700836181641, "learning_rate": 3.451283913959032e-07, "loss": 0.3315, "step": 15263 }, { "epoch": 2.21, "grad_norm": 7.248476505279541, "learning_rate": 3.4500877909860327e-07, "loss": 0.2985, "step": 15264 }, { "epoch": 2.21, "grad_norm": 8.23336410522461, "learning_rate": 3.448891832110434e-07, "loss": 0.2857, "step": 15265 }, { "epoch": 2.22, "grad_norm": 7.928717136383057, "learning_rate": 3.447696037362213e-07, "loss": 0.3507, "step": 15266 }, { "epoch": 2.22, "grad_norm": 7.88926887512207, "learning_rate": 3.446500406771314e-07, "loss": 0.3465, "step": 15267 }, { "epoch": 2.22, "grad_norm": 8.723979949951172, "learning_rate": 3.4453049403677073e-07, "loss": 0.3777, "step": 15268 }, { "epoch": 2.22, "grad_norm": 8.611713409423828, "learning_rate": 3.444109638181327e-07, "loss": 0.3268, "step": 15269 }, { "epoch": 2.22, "grad_norm": 7.589775562286377, "learning_rate": 3.4429145002421343e-07, "loss": 0.3452, "step": 15270 }, { "epoch": 2.22, "grad_norm": 8.951315879821777, "learning_rate": 3.4417195265800625e-07, "loss": 0.3668, "step": 15271 }, { "epoch": 2.22, "grad_norm": 9.136629104614258, "learning_rate": 3.4405247172250473e-07, "loss": 0.3651, "step": 15272 }, { "epoch": 2.22, "grad_norm": 9.56664752960205, "learning_rate": 3.439330072207034e-07, "loss": 0.3542, "step": 15273 }, { "epoch": 2.22, "grad_norm": 8.906696319580078, "learning_rate": 3.4381355915559397e-07, "loss": 0.3151, "step": 15274 }, { "epoch": 2.22, "grad_norm": 7.858875274658203, "learning_rate": 3.436941275301702e-07, "loss": 0.3534, "step": 15275 }, { "epoch": 2.22, "grad_norm": 8.182201385498047, "learning_rate": 3.435747123474234e-07, "loss": 0.3288, "step": 15276 }, { "epoch": 2.22, "grad_norm": 8.607396125793457, "learning_rate": 3.4345531361034566e-07, "loss": 0.3311, "step": 15277 }, { "epoch": 2.22, "grad_norm": 8.462668418884277, "learning_rate": 3.433359313219283e-07, "loss": 0.3671, "step": 15278 }, { "epoch": 2.22, "grad_norm": 9.321447372436523, "learning_rate": 3.432165654851621e-07, "loss": 0.4073, "step": 15279 }, { "epoch": 2.22, "grad_norm": 7.873934268951416, "learning_rate": 3.4309721610303765e-07, "loss": 0.336, "step": 15280 }, { "epoch": 2.22, "grad_norm": 9.136046409606934, "learning_rate": 3.4297788317854514e-07, "loss": 0.3784, "step": 15281 }, { "epoch": 2.22, "grad_norm": 9.199450492858887, "learning_rate": 3.428585667146743e-07, "loss": 0.3619, "step": 15282 }, { "epoch": 2.22, "grad_norm": 8.475021362304688, "learning_rate": 3.427392667144143e-07, "loss": 0.4057, "step": 15283 }, { "epoch": 2.22, "grad_norm": 8.177133560180664, "learning_rate": 3.4261998318075405e-07, "loss": 0.3562, "step": 15284 }, { "epoch": 2.22, "grad_norm": 9.066023826599121, "learning_rate": 3.425007161166822e-07, "loss": 0.3672, "step": 15285 }, { "epoch": 2.22, "grad_norm": 8.929998397827148, "learning_rate": 3.42381465525186e-07, "loss": 0.3669, "step": 15286 }, { "epoch": 2.22, "grad_norm": 8.218194961547852, "learning_rate": 3.422622314092544e-07, "loss": 0.4016, "step": 15287 }, { "epoch": 2.22, "grad_norm": 9.047908782958984, "learning_rate": 3.4214301377187316e-07, "loss": 0.3604, "step": 15288 }, { "epoch": 2.22, "grad_norm": 6.637589454650879, "learning_rate": 3.4202381261603044e-07, "loss": 0.3266, "step": 15289 }, { "epoch": 2.22, "grad_norm": 8.92186450958252, "learning_rate": 3.4190462794471164e-07, "loss": 0.3648, "step": 15290 }, { "epoch": 2.22, "grad_norm": 7.871857166290283, "learning_rate": 3.4178545976090313e-07, "loss": 0.3539, "step": 15291 }, { "epoch": 2.22, "grad_norm": 8.076350212097168, "learning_rate": 3.4166630806759044e-07, "loss": 0.3733, "step": 15292 }, { "epoch": 2.22, "grad_norm": 7.664570331573486, "learning_rate": 3.415471728677587e-07, "loss": 0.3726, "step": 15293 }, { "epoch": 2.22, "grad_norm": 7.805140972137451, "learning_rate": 3.4142805416439267e-07, "loss": 0.32, "step": 15294 }, { "epoch": 2.22, "grad_norm": 8.120443344116211, "learning_rate": 3.413089519604768e-07, "loss": 0.331, "step": 15295 }, { "epoch": 2.22, "grad_norm": 8.347434997558594, "learning_rate": 3.411898662589947e-07, "loss": 0.3501, "step": 15296 }, { "epoch": 2.22, "grad_norm": 9.323261260986328, "learning_rate": 3.4107079706293016e-07, "loss": 0.3756, "step": 15297 }, { "epoch": 2.22, "grad_norm": 8.149177551269531, "learning_rate": 3.409517443752661e-07, "loss": 0.3732, "step": 15298 }, { "epoch": 2.22, "grad_norm": 7.805567264556885, "learning_rate": 3.4083270819898534e-07, "loss": 0.3968, "step": 15299 }, { "epoch": 2.22, "grad_norm": 8.341535568237305, "learning_rate": 3.4071368853706994e-07, "loss": 0.3389, "step": 15300 }, { "epoch": 2.22, "grad_norm": 8.653974533081055, "learning_rate": 3.405946853925018e-07, "loss": 0.4218, "step": 15301 }, { "epoch": 2.22, "grad_norm": 7.506526947021484, "learning_rate": 3.4047569876826254e-07, "loss": 0.347, "step": 15302 }, { "epoch": 2.22, "grad_norm": 8.387025833129883, "learning_rate": 3.403567286673329e-07, "loss": 0.3679, "step": 15303 }, { "epoch": 2.22, "grad_norm": 8.309062004089355, "learning_rate": 3.402377750926937e-07, "loss": 0.3711, "step": 15304 }, { "epoch": 2.22, "grad_norm": 8.512500762939453, "learning_rate": 3.4011883804732497e-07, "loss": 0.3603, "step": 15305 }, { "epoch": 2.22, "grad_norm": 7.754251480102539, "learning_rate": 3.39999917534207e-07, "loss": 0.3469, "step": 15306 }, { "epoch": 2.22, "grad_norm": 8.544041633605957, "learning_rate": 3.39881013556318e-07, "loss": 0.4105, "step": 15307 }, { "epoch": 2.22, "grad_norm": 8.426787376403809, "learning_rate": 3.3976212611663824e-07, "loss": 0.3644, "step": 15308 }, { "epoch": 2.22, "grad_norm": 8.721688270568848, "learning_rate": 3.3964325521814507e-07, "loss": 0.3122, "step": 15309 }, { "epoch": 2.22, "grad_norm": 8.743998527526855, "learning_rate": 3.3952440086381774e-07, "loss": 0.3775, "step": 15310 }, { "epoch": 2.22, "grad_norm": 8.765339851379395, "learning_rate": 3.3940556305663314e-07, "loss": 0.3292, "step": 15311 }, { "epoch": 2.22, "grad_norm": 7.766089916229248, "learning_rate": 3.392867417995687e-07, "loss": 0.3488, "step": 15312 }, { "epoch": 2.22, "grad_norm": 7.5584259033203125, "learning_rate": 3.3916793709560145e-07, "loss": 0.3484, "step": 15313 }, { "epoch": 2.22, "grad_norm": 7.544613838195801, "learning_rate": 3.390491489477079e-07, "loss": 0.3094, "step": 15314 }, { "epoch": 2.22, "grad_norm": 8.068974494934082, "learning_rate": 3.3893037735886374e-07, "loss": 0.3817, "step": 15315 }, { "epoch": 2.22, "grad_norm": 9.841010093688965, "learning_rate": 3.388116223320451e-07, "loss": 0.4567, "step": 15316 }, { "epoch": 2.22, "grad_norm": 7.97529411315918, "learning_rate": 3.386928838702268e-07, "loss": 0.2779, "step": 15317 }, { "epoch": 2.22, "grad_norm": 8.780543327331543, "learning_rate": 3.385741619763839e-07, "loss": 0.3576, "step": 15318 }, { "epoch": 2.22, "grad_norm": 8.097854614257812, "learning_rate": 3.3845545665349053e-07, "loss": 0.3546, "step": 15319 }, { "epoch": 2.22, "grad_norm": 8.608457565307617, "learning_rate": 3.3833676790452115e-07, "loss": 0.3895, "step": 15320 }, { "epoch": 2.22, "grad_norm": 9.177552223205566, "learning_rate": 3.3821809573244844e-07, "loss": 0.4026, "step": 15321 }, { "epoch": 2.22, "grad_norm": 8.352093696594238, "learning_rate": 3.380994401402466e-07, "loss": 0.3855, "step": 15322 }, { "epoch": 2.22, "grad_norm": 7.994204044342041, "learning_rate": 3.3798080113088714e-07, "loss": 0.3592, "step": 15323 }, { "epoch": 2.22, "grad_norm": 7.748283386230469, "learning_rate": 3.3786217870734334e-07, "loss": 0.3372, "step": 15324 }, { "epoch": 2.22, "grad_norm": 7.807862758636475, "learning_rate": 3.3774357287258724e-07, "loss": 0.3496, "step": 15325 }, { "epoch": 2.22, "grad_norm": 8.362001419067383, "learning_rate": 3.3762498362958916e-07, "loss": 0.3709, "step": 15326 }, { "epoch": 2.22, "grad_norm": 7.848312854766846, "learning_rate": 3.375064109813215e-07, "loss": 0.3562, "step": 15327 }, { "epoch": 2.22, "grad_norm": 7.713821887969971, "learning_rate": 3.373878549307536e-07, "loss": 0.319, "step": 15328 }, { "epoch": 2.22, "grad_norm": 8.729860305786133, "learning_rate": 3.372693154808571e-07, "loss": 0.3356, "step": 15329 }, { "epoch": 2.22, "grad_norm": 8.204665184020996, "learning_rate": 3.3715079263460035e-07, "loss": 0.2778, "step": 15330 }, { "epoch": 2.22, "grad_norm": 8.780458450317383, "learning_rate": 3.370322863949542e-07, "loss": 0.3934, "step": 15331 }, { "epoch": 2.22, "grad_norm": 7.914637088775635, "learning_rate": 3.369137967648865e-07, "loss": 0.3687, "step": 15332 }, { "epoch": 2.22, "grad_norm": 7.6881842613220215, "learning_rate": 3.367953237473663e-07, "loss": 0.2932, "step": 15333 }, { "epoch": 2.22, "grad_norm": 8.027851104736328, "learning_rate": 3.3667686734536163e-07, "loss": 0.3935, "step": 15334 }, { "epoch": 2.23, "grad_norm": 9.134344100952148, "learning_rate": 3.365584275618403e-07, "loss": 0.3368, "step": 15335 }, { "epoch": 2.23, "grad_norm": 8.017603874206543, "learning_rate": 3.364400043997695e-07, "loss": 0.4053, "step": 15336 }, { "epoch": 2.23, "grad_norm": 8.382822036743164, "learning_rate": 3.3632159786211623e-07, "loss": 0.3371, "step": 15337 }, { "epoch": 2.23, "grad_norm": 7.762008190155029, "learning_rate": 3.3620320795184697e-07, "loss": 0.3183, "step": 15338 }, { "epoch": 2.23, "grad_norm": 8.29802131652832, "learning_rate": 3.360848346719278e-07, "loss": 0.3066, "step": 15339 }, { "epoch": 2.23, "grad_norm": 8.209914207458496, "learning_rate": 3.359664780253244e-07, "loss": 0.3532, "step": 15340 }, { "epoch": 2.23, "grad_norm": 9.521585464477539, "learning_rate": 3.358481380150022e-07, "loss": 0.3318, "step": 15341 }, { "epoch": 2.23, "grad_norm": 8.042231559753418, "learning_rate": 3.3572981464392516e-07, "loss": 0.3207, "step": 15342 }, { "epoch": 2.23, "grad_norm": 7.904848575592041, "learning_rate": 3.35611507915059e-07, "loss": 0.353, "step": 15343 }, { "epoch": 2.23, "grad_norm": 9.033432006835938, "learning_rate": 3.354932178313662e-07, "loss": 0.3624, "step": 15344 }, { "epoch": 2.23, "grad_norm": 9.07819938659668, "learning_rate": 3.3537494439581183e-07, "loss": 0.3286, "step": 15345 }, { "epoch": 2.23, "grad_norm": 8.77668571472168, "learning_rate": 3.3525668761135805e-07, "loss": 0.36, "step": 15346 }, { "epoch": 2.23, "grad_norm": 10.555245399475098, "learning_rate": 3.3513844748096775e-07, "loss": 0.3804, "step": 15347 }, { "epoch": 2.23, "grad_norm": 7.836195468902588, "learning_rate": 3.3502022400760345e-07, "loss": 0.3356, "step": 15348 }, { "epoch": 2.23, "grad_norm": 7.740089416503906, "learning_rate": 3.3490201719422664e-07, "loss": 0.3467, "step": 15349 }, { "epoch": 2.23, "grad_norm": 8.22524642944336, "learning_rate": 3.347838270437997e-07, "loss": 0.3751, "step": 15350 }, { "epoch": 2.23, "grad_norm": 8.80829906463623, "learning_rate": 3.3466565355928254e-07, "loss": 0.318, "step": 15351 }, { "epoch": 2.23, "grad_norm": 8.587067604064941, "learning_rate": 3.34547496743637e-07, "loss": 0.356, "step": 15352 }, { "epoch": 2.23, "grad_norm": 8.357341766357422, "learning_rate": 3.344293565998223e-07, "loss": 0.2902, "step": 15353 }, { "epoch": 2.23, "grad_norm": 8.156355857849121, "learning_rate": 3.343112331307988e-07, "loss": 0.3915, "step": 15354 }, { "epoch": 2.23, "grad_norm": 8.710597038269043, "learning_rate": 3.3419312633952555e-07, "loss": 0.376, "step": 15355 }, { "epoch": 2.23, "grad_norm": 6.888816833496094, "learning_rate": 3.340750362289617e-07, "loss": 0.359, "step": 15356 }, { "epoch": 2.23, "grad_norm": 7.9879279136657715, "learning_rate": 3.3395696280206587e-07, "loss": 0.3431, "step": 15357 }, { "epoch": 2.23, "grad_norm": 8.788811683654785, "learning_rate": 3.338389060617961e-07, "loss": 0.3926, "step": 15358 }, { "epoch": 2.23, "grad_norm": 9.744380950927734, "learning_rate": 3.337208660111102e-07, "loss": 0.3806, "step": 15359 }, { "epoch": 2.23, "grad_norm": 9.067197799682617, "learning_rate": 3.336028426529653e-07, "loss": 0.3711, "step": 15360 }, { "epoch": 2.23, "grad_norm": 8.40982437133789, "learning_rate": 3.3348483599031853e-07, "loss": 0.3578, "step": 15361 }, { "epoch": 2.23, "grad_norm": 9.610661506652832, "learning_rate": 3.3336684602612654e-07, "loss": 0.3412, "step": 15362 }, { "epoch": 2.23, "grad_norm": 7.693116664886475, "learning_rate": 3.332488727633443e-07, "loss": 0.3049, "step": 15363 }, { "epoch": 2.23, "grad_norm": 8.223438262939453, "learning_rate": 3.3313091620492893e-07, "loss": 0.277, "step": 15364 }, { "epoch": 2.23, "grad_norm": 8.040536880493164, "learning_rate": 3.330129763538342e-07, "loss": 0.3414, "step": 15365 }, { "epoch": 2.23, "grad_norm": 9.670896530151367, "learning_rate": 3.3289505321301626e-07, "loss": 0.4318, "step": 15366 }, { "epoch": 2.23, "grad_norm": 8.876534461975098, "learning_rate": 3.327771467854286e-07, "loss": 0.4127, "step": 15367 }, { "epoch": 2.23, "grad_norm": 8.91294002532959, "learning_rate": 3.326592570740253e-07, "loss": 0.4165, "step": 15368 }, { "epoch": 2.23, "grad_norm": 8.097453117370605, "learning_rate": 3.3254138408176004e-07, "loss": 0.3243, "step": 15369 }, { "epoch": 2.23, "grad_norm": 8.770240783691406, "learning_rate": 3.324235278115859e-07, "loss": 0.4027, "step": 15370 }, { "epoch": 2.23, "grad_norm": 7.804079532623291, "learning_rate": 3.3230568826645546e-07, "loss": 0.3605, "step": 15371 }, { "epoch": 2.23, "grad_norm": 8.044339179992676, "learning_rate": 3.321878654493212e-07, "loss": 0.3118, "step": 15372 }, { "epoch": 2.23, "grad_norm": 7.867913246154785, "learning_rate": 3.320700593631349e-07, "loss": 0.33, "step": 15373 }, { "epoch": 2.23, "grad_norm": 8.924885749816895, "learning_rate": 3.3195227001084804e-07, "loss": 0.3749, "step": 15374 }, { "epoch": 2.23, "grad_norm": 10.838892936706543, "learning_rate": 3.318344973954116e-07, "loss": 0.4033, "step": 15375 }, { "epoch": 2.23, "grad_norm": 7.7259297370910645, "learning_rate": 3.3171674151977604e-07, "loss": 0.3193, "step": 15376 }, { "epoch": 2.23, "grad_norm": 7.726150035858154, "learning_rate": 3.3159900238689173e-07, "loss": 0.3472, "step": 15377 }, { "epoch": 2.23, "grad_norm": 7.2824249267578125, "learning_rate": 3.3148127999970845e-07, "loss": 0.3349, "step": 15378 }, { "epoch": 2.23, "grad_norm": 7.551305770874023, "learning_rate": 3.3136357436117534e-07, "loss": 0.3452, "step": 15379 }, { "epoch": 2.23, "grad_norm": 8.25271224975586, "learning_rate": 3.312458854742416e-07, "loss": 0.3526, "step": 15380 }, { "epoch": 2.23, "grad_norm": 9.268769264221191, "learning_rate": 3.3112821334185583e-07, "loss": 0.3767, "step": 15381 }, { "epoch": 2.23, "grad_norm": 7.870671272277832, "learning_rate": 3.310105579669652e-07, "loss": 0.3313, "step": 15382 }, { "epoch": 2.23, "grad_norm": 7.892087459564209, "learning_rate": 3.308929193525187e-07, "loss": 0.3699, "step": 15383 }, { "epoch": 2.23, "grad_norm": 8.322522163391113, "learning_rate": 3.307752975014624e-07, "loss": 0.3382, "step": 15384 }, { "epoch": 2.23, "grad_norm": 8.584917068481445, "learning_rate": 3.3065769241674414e-07, "loss": 0.3577, "step": 15385 }, { "epoch": 2.23, "grad_norm": 8.801007270812988, "learning_rate": 3.3054010410130927e-07, "loss": 0.3887, "step": 15386 }, { "epoch": 2.23, "grad_norm": 7.69019889831543, "learning_rate": 3.304225325581049e-07, "loss": 0.3159, "step": 15387 }, { "epoch": 2.23, "grad_norm": 9.594077110290527, "learning_rate": 3.3030497779007585e-07, "loss": 0.3819, "step": 15388 }, { "epoch": 2.23, "grad_norm": 7.736692428588867, "learning_rate": 3.301874398001673e-07, "loss": 0.3076, "step": 15389 }, { "epoch": 2.23, "grad_norm": 9.182395935058594, "learning_rate": 3.3006991859132414e-07, "loss": 0.3511, "step": 15390 }, { "epoch": 2.23, "grad_norm": 7.649592876434326, "learning_rate": 3.2995241416649066e-07, "loss": 0.3264, "step": 15391 }, { "epoch": 2.23, "grad_norm": 7.977896213531494, "learning_rate": 3.298349265286108e-07, "loss": 0.3448, "step": 15392 }, { "epoch": 2.23, "grad_norm": 8.451062202453613, "learning_rate": 3.297174556806279e-07, "loss": 0.3787, "step": 15393 }, { "epoch": 2.23, "grad_norm": 9.76851749420166, "learning_rate": 3.2960000162548506e-07, "loss": 0.3892, "step": 15394 }, { "epoch": 2.23, "grad_norm": 8.475336074829102, "learning_rate": 3.2948256436612485e-07, "loss": 0.3315, "step": 15395 }, { "epoch": 2.23, "grad_norm": 7.928587913513184, "learning_rate": 3.2936514390548955e-07, "loss": 0.3496, "step": 15396 }, { "epoch": 2.23, "grad_norm": 8.074554443359375, "learning_rate": 3.2924774024652123e-07, "loss": 0.3489, "step": 15397 }, { "epoch": 2.23, "grad_norm": 10.158935546875, "learning_rate": 3.291303533921602e-07, "loss": 0.4144, "step": 15398 }, { "epoch": 2.23, "grad_norm": 8.358412742614746, "learning_rate": 3.2901298334534886e-07, "loss": 0.3577, "step": 15399 }, { "epoch": 2.23, "grad_norm": 8.144745826721191, "learning_rate": 3.2889563010902634e-07, "loss": 0.3403, "step": 15400 }, { "epoch": 2.23, "grad_norm": 8.701370239257812, "learning_rate": 3.2877829368613365e-07, "loss": 0.3764, "step": 15401 }, { "epoch": 2.23, "grad_norm": 7.639495849609375, "learning_rate": 3.286609740796105e-07, "loss": 0.321, "step": 15402 }, { "epoch": 2.23, "grad_norm": 7.805147647857666, "learning_rate": 3.285436712923952e-07, "loss": 0.3418, "step": 15403 }, { "epoch": 2.24, "grad_norm": 7.973447799682617, "learning_rate": 3.2842638532742794e-07, "loss": 0.3504, "step": 15404 }, { "epoch": 2.24, "grad_norm": 7.526615619659424, "learning_rate": 3.2830911618764567e-07, "loss": 0.3417, "step": 15405 }, { "epoch": 2.24, "grad_norm": 8.993931770324707, "learning_rate": 3.281918638759877e-07, "loss": 0.4006, "step": 15406 }, { "epoch": 2.24, "grad_norm": 7.715199947357178, "learning_rate": 3.280746283953905e-07, "loss": 0.3213, "step": 15407 }, { "epoch": 2.24, "grad_norm": 9.503294944763184, "learning_rate": 3.2795740974879227e-07, "loss": 0.3025, "step": 15408 }, { "epoch": 2.24, "grad_norm": 8.086363792419434, "learning_rate": 3.2784020793912895e-07, "loss": 0.3139, "step": 15409 }, { "epoch": 2.24, "grad_norm": 9.390474319458008, "learning_rate": 3.2772302296933707e-07, "loss": 0.4125, "step": 15410 }, { "epoch": 2.24, "grad_norm": 8.696961402893066, "learning_rate": 3.2760585484235235e-07, "loss": 0.3351, "step": 15411 }, { "epoch": 2.24, "grad_norm": 8.93355941772461, "learning_rate": 3.2748870356111055e-07, "loss": 0.3658, "step": 15412 }, { "epoch": 2.24, "grad_norm": 7.5813307762146, "learning_rate": 3.273715691285466e-07, "loss": 0.2744, "step": 15413 }, { "epoch": 2.24, "grad_norm": 8.686500549316406, "learning_rate": 3.27254451547595e-07, "loss": 0.3411, "step": 15414 }, { "epoch": 2.24, "grad_norm": 9.038118362426758, "learning_rate": 3.271373508211901e-07, "loss": 0.3441, "step": 15415 }, { "epoch": 2.24, "grad_norm": 8.028489112854004, "learning_rate": 3.270202669522655e-07, "loss": 0.3451, "step": 15416 }, { "epoch": 2.24, "grad_norm": 7.783565521240234, "learning_rate": 3.269031999437547e-07, "loss": 0.3084, "step": 15417 }, { "epoch": 2.24, "grad_norm": 7.884927749633789, "learning_rate": 3.2678614979859085e-07, "loss": 0.34, "step": 15418 }, { "epoch": 2.24, "grad_norm": 7.930122375488281, "learning_rate": 3.266691165197055e-07, "loss": 0.2818, "step": 15419 }, { "epoch": 2.24, "grad_norm": 8.259740829467773, "learning_rate": 3.265521001100321e-07, "loss": 0.3075, "step": 15420 }, { "epoch": 2.24, "grad_norm": 8.688180923461914, "learning_rate": 3.264351005725008e-07, "loss": 0.3462, "step": 15421 }, { "epoch": 2.24, "grad_norm": 8.189838409423828, "learning_rate": 3.263181179100444e-07, "loss": 0.3604, "step": 15422 }, { "epoch": 2.24, "grad_norm": 8.167753219604492, "learning_rate": 3.2620115212559264e-07, "loss": 0.3578, "step": 15423 }, { "epoch": 2.24, "grad_norm": 9.628679275512695, "learning_rate": 3.2608420322207607e-07, "loss": 0.3612, "step": 15424 }, { "epoch": 2.24, "grad_norm": 8.352988243103027, "learning_rate": 3.2596727120242483e-07, "loss": 0.3427, "step": 15425 }, { "epoch": 2.24, "grad_norm": 8.887925148010254, "learning_rate": 3.2585035606956855e-07, "loss": 0.3508, "step": 15426 }, { "epoch": 2.24, "grad_norm": 8.491896629333496, "learning_rate": 3.257334578264361e-07, "loss": 0.3513, "step": 15427 }, { "epoch": 2.24, "grad_norm": 8.207118034362793, "learning_rate": 3.2561657647595633e-07, "loss": 0.3611, "step": 15428 }, { "epoch": 2.24, "grad_norm": 9.037508010864258, "learning_rate": 3.254997120210575e-07, "loss": 0.4123, "step": 15429 }, { "epoch": 2.24, "grad_norm": 9.538633346557617, "learning_rate": 3.2538286446466743e-07, "loss": 0.3646, "step": 15430 }, { "epoch": 2.24, "grad_norm": 8.909263610839844, "learning_rate": 3.2526603380971353e-07, "loss": 0.4011, "step": 15431 }, { "epoch": 2.24, "grad_norm": 8.078531265258789, "learning_rate": 3.251492200591228e-07, "loss": 0.3181, "step": 15432 }, { "epoch": 2.24, "grad_norm": 8.751358985900879, "learning_rate": 3.250324232158219e-07, "loss": 0.3675, "step": 15433 }, { "epoch": 2.24, "grad_norm": 8.045656204223633, "learning_rate": 3.249156432827369e-07, "loss": 0.3082, "step": 15434 }, { "epoch": 2.24, "grad_norm": 7.529475688934326, "learning_rate": 3.2479888026279357e-07, "loss": 0.3014, "step": 15435 }, { "epoch": 2.24, "grad_norm": 10.349294662475586, "learning_rate": 3.2468213415891723e-07, "loss": 0.4338, "step": 15436 }, { "epoch": 2.24, "grad_norm": 7.722566604614258, "learning_rate": 3.2456540497403294e-07, "loss": 0.2875, "step": 15437 }, { "epoch": 2.24, "grad_norm": 9.0237398147583, "learning_rate": 3.2444869271106446e-07, "loss": 0.3906, "step": 15438 }, { "epoch": 2.24, "grad_norm": 7.710055351257324, "learning_rate": 3.2433199737293694e-07, "loss": 0.322, "step": 15439 }, { "epoch": 2.24, "grad_norm": 7.6299638748168945, "learning_rate": 3.242153189625727e-07, "loss": 0.3045, "step": 15440 }, { "epoch": 2.24, "grad_norm": 9.618902206420898, "learning_rate": 3.2409865748289634e-07, "loss": 0.3556, "step": 15441 }, { "epoch": 2.24, "grad_norm": 8.59057903289795, "learning_rate": 3.2398201293682915e-07, "loss": 0.3012, "step": 15442 }, { "epoch": 2.24, "grad_norm": 8.882445335388184, "learning_rate": 3.238653853272949e-07, "loss": 0.3088, "step": 15443 }, { "epoch": 2.24, "grad_norm": 9.435013771057129, "learning_rate": 3.2374877465721453e-07, "loss": 0.3367, "step": 15444 }, { "epoch": 2.24, "grad_norm": 8.507984161376953, "learning_rate": 3.236321809295097e-07, "loss": 0.3736, "step": 15445 }, { "epoch": 2.24, "grad_norm": 8.9644775390625, "learning_rate": 3.235156041471018e-07, "loss": 0.2965, "step": 15446 }, { "epoch": 2.24, "grad_norm": 9.265467643737793, "learning_rate": 3.233990443129111e-07, "loss": 0.4127, "step": 15447 }, { "epoch": 2.24, "grad_norm": 8.251866340637207, "learning_rate": 3.2328250142985804e-07, "loss": 0.3222, "step": 15448 }, { "epoch": 2.24, "grad_norm": 9.628676414489746, "learning_rate": 3.2316597550086235e-07, "loss": 0.3784, "step": 15449 }, { "epoch": 2.24, "grad_norm": 8.00336742401123, "learning_rate": 3.2304946652884337e-07, "loss": 0.3394, "step": 15450 }, { "epoch": 2.24, "grad_norm": 7.685344696044922, "learning_rate": 3.229329745167201e-07, "loss": 0.3249, "step": 15451 }, { "epoch": 2.24, "grad_norm": 8.318543434143066, "learning_rate": 3.2281649946741097e-07, "loss": 0.3271, "step": 15452 }, { "epoch": 2.24, "grad_norm": 9.816160202026367, "learning_rate": 3.2270004138383424e-07, "loss": 0.3572, "step": 15453 }, { "epoch": 2.24, "grad_norm": 8.796704292297363, "learning_rate": 3.225836002689074e-07, "loss": 0.4435, "step": 15454 }, { "epoch": 2.24, "grad_norm": 7.480320930480957, "learning_rate": 3.2246717612554783e-07, "loss": 0.339, "step": 15455 }, { "epoch": 2.24, "grad_norm": 8.368573188781738, "learning_rate": 3.2235076895667237e-07, "loss": 0.3038, "step": 15456 }, { "epoch": 2.24, "grad_norm": 8.423826217651367, "learning_rate": 3.222343787651972e-07, "loss": 0.349, "step": 15457 }, { "epoch": 2.24, "grad_norm": 8.225144386291504, "learning_rate": 3.221180055540388e-07, "loss": 0.3478, "step": 15458 }, { "epoch": 2.24, "grad_norm": 8.257488250732422, "learning_rate": 3.220016493261116e-07, "loss": 0.3737, "step": 15459 }, { "epoch": 2.24, "grad_norm": 7.686059474945068, "learning_rate": 3.2188531008433215e-07, "loss": 0.3047, "step": 15460 }, { "epoch": 2.24, "grad_norm": 8.49964427947998, "learning_rate": 3.2176898783161375e-07, "loss": 0.3312, "step": 15461 }, { "epoch": 2.24, "grad_norm": 10.912395477294922, "learning_rate": 3.2165268257087206e-07, "loss": 0.4176, "step": 15462 }, { "epoch": 2.24, "grad_norm": 8.06575870513916, "learning_rate": 3.215363943050198e-07, "loss": 0.3423, "step": 15463 }, { "epoch": 2.24, "grad_norm": 9.913020133972168, "learning_rate": 3.2142012303697087e-07, "loss": 0.3611, "step": 15464 }, { "epoch": 2.24, "grad_norm": 8.47026538848877, "learning_rate": 3.213038687696381e-07, "loss": 0.3822, "step": 15465 }, { "epoch": 2.24, "grad_norm": 9.121015548706055, "learning_rate": 3.211876315059342e-07, "loss": 0.3608, "step": 15466 }, { "epoch": 2.24, "grad_norm": 9.557633399963379, "learning_rate": 3.2107141124877123e-07, "loss": 0.3601, "step": 15467 }, { "epoch": 2.24, "grad_norm": 9.159807205200195, "learning_rate": 3.209552080010609e-07, "loss": 0.3556, "step": 15468 }, { "epoch": 2.24, "grad_norm": 8.265815734863281, "learning_rate": 3.2083902176571456e-07, "loss": 0.3136, "step": 15469 }, { "epoch": 2.24, "grad_norm": 8.944751739501953, "learning_rate": 3.207228525456429e-07, "loss": 0.33, "step": 15470 }, { "epoch": 2.24, "grad_norm": 8.712297439575195, "learning_rate": 3.2060670034375657e-07, "loss": 0.3598, "step": 15471 }, { "epoch": 2.24, "grad_norm": 8.23837661743164, "learning_rate": 3.204905651629658e-07, "loss": 0.3702, "step": 15472 }, { "epoch": 2.25, "grad_norm": 7.953345775604248, "learning_rate": 3.203744470061791e-07, "loss": 0.3117, "step": 15473 }, { "epoch": 2.25, "grad_norm": 8.328441619873047, "learning_rate": 3.2025834587630695e-07, "loss": 0.398, "step": 15474 }, { "epoch": 2.25, "grad_norm": 8.710356712341309, "learning_rate": 3.2014226177625694e-07, "loss": 0.3415, "step": 15475 }, { "epoch": 2.25, "grad_norm": 8.438793182373047, "learning_rate": 3.2002619470893857e-07, "loss": 0.3663, "step": 15476 }, { "epoch": 2.25, "grad_norm": 8.440832138061523, "learning_rate": 3.1991014467725827e-07, "loss": 0.3469, "step": 15477 }, { "epoch": 2.25, "grad_norm": 9.651713371276855, "learning_rate": 3.197941116841251e-07, "loss": 0.4058, "step": 15478 }, { "epoch": 2.25, "grad_norm": 8.688196182250977, "learning_rate": 3.1967809573244473e-07, "loss": 0.3876, "step": 15479 }, { "epoch": 2.25, "grad_norm": 8.549938201904297, "learning_rate": 3.1956209682512403e-07, "loss": 0.3847, "step": 15480 }, { "epoch": 2.25, "grad_norm": 7.8732008934021, "learning_rate": 3.194461149650701e-07, "loss": 0.3178, "step": 15481 }, { "epoch": 2.25, "grad_norm": 8.644797325134277, "learning_rate": 3.193301501551873e-07, "loss": 0.3574, "step": 15482 }, { "epoch": 2.25, "grad_norm": 8.949540138244629, "learning_rate": 3.1921420239838215e-07, "loss": 0.4009, "step": 15483 }, { "epoch": 2.25, "grad_norm": 8.695666313171387, "learning_rate": 3.1909827169755876e-07, "loss": 0.3515, "step": 15484 }, { "epoch": 2.25, "grad_norm": 8.807040214538574, "learning_rate": 3.189823580556219e-07, "loss": 0.3509, "step": 15485 }, { "epoch": 2.25, "grad_norm": 8.21571159362793, "learning_rate": 3.1886646147547546e-07, "loss": 0.3571, "step": 15486 }, { "epoch": 2.25, "grad_norm": 8.32768440246582, "learning_rate": 3.18750581960023e-07, "loss": 0.319, "step": 15487 }, { "epoch": 2.25, "grad_norm": 8.934679985046387, "learning_rate": 3.186347195121679e-07, "loss": 0.372, "step": 15488 }, { "epoch": 2.25, "grad_norm": 9.807043075561523, "learning_rate": 3.185188741348128e-07, "loss": 0.3805, "step": 15489 }, { "epoch": 2.25, "grad_norm": 8.8118257522583, "learning_rate": 3.1840304583086e-07, "loss": 0.3681, "step": 15490 }, { "epoch": 2.25, "grad_norm": 8.27359390258789, "learning_rate": 3.182872346032114e-07, "loss": 0.3484, "step": 15491 }, { "epoch": 2.25, "grad_norm": 10.485553741455078, "learning_rate": 3.1817144045476853e-07, "loss": 0.3615, "step": 15492 }, { "epoch": 2.25, "grad_norm": 7.794796466827393, "learning_rate": 3.180556633884326e-07, "loss": 0.3762, "step": 15493 }, { "epoch": 2.25, "grad_norm": 9.098508834838867, "learning_rate": 3.1793990340710343e-07, "loss": 0.3616, "step": 15494 }, { "epoch": 2.25, "grad_norm": 8.820147514343262, "learning_rate": 3.178241605136823e-07, "loss": 0.3724, "step": 15495 }, { "epoch": 2.25, "grad_norm": 7.91278600692749, "learning_rate": 3.177084347110678e-07, "loss": 0.333, "step": 15496 }, { "epoch": 2.25, "grad_norm": 8.104166030883789, "learning_rate": 3.175927260021605e-07, "loss": 0.3519, "step": 15497 }, { "epoch": 2.25, "grad_norm": 9.370696067810059, "learning_rate": 3.174770343898581e-07, "loss": 0.4271, "step": 15498 }, { "epoch": 2.25, "grad_norm": 9.039917945861816, "learning_rate": 3.1736135987706014e-07, "loss": 0.4239, "step": 15499 }, { "epoch": 2.25, "grad_norm": 7.949802875518799, "learning_rate": 3.1724570246666384e-07, "loss": 0.358, "step": 15500 }, { "epoch": 2.25, "grad_norm": 9.068690299987793, "learning_rate": 3.1713006216156714e-07, "loss": 0.3275, "step": 15501 }, { "epoch": 2.25, "grad_norm": 8.544914245605469, "learning_rate": 3.170144389646672e-07, "loss": 0.3617, "step": 15502 }, { "epoch": 2.25, "grad_norm": 9.642536163330078, "learning_rate": 3.1689883287886067e-07, "loss": 0.3432, "step": 15503 }, { "epoch": 2.25, "grad_norm": 7.728343963623047, "learning_rate": 3.1678324390704403e-07, "loss": 0.3499, "step": 15504 }, { "epoch": 2.25, "grad_norm": 7.868088245391846, "learning_rate": 3.166676720521131e-07, "loss": 0.3394, "step": 15505 }, { "epoch": 2.25, "grad_norm": 7.8515825271606445, "learning_rate": 3.165521173169633e-07, "loss": 0.3531, "step": 15506 }, { "epoch": 2.25, "grad_norm": 7.355618000030518, "learning_rate": 3.1643657970448977e-07, "loss": 0.3126, "step": 15507 }, { "epoch": 2.25, "grad_norm": 9.029708862304688, "learning_rate": 3.16321059217587e-07, "loss": 0.3977, "step": 15508 }, { "epoch": 2.25, "grad_norm": 8.927101135253906, "learning_rate": 3.162055558591493e-07, "loss": 0.3744, "step": 15509 }, { "epoch": 2.25, "grad_norm": 8.645574569702148, "learning_rate": 3.160900696320703e-07, "loss": 0.3234, "step": 15510 }, { "epoch": 2.25, "grad_norm": 7.57262659072876, "learning_rate": 3.159746005392434e-07, "loss": 0.2908, "step": 15511 }, { "epoch": 2.25, "grad_norm": 8.742036819458008, "learning_rate": 3.1585914858356145e-07, "loss": 0.3095, "step": 15512 }, { "epoch": 2.25, "grad_norm": 8.87120532989502, "learning_rate": 3.1574371376791696e-07, "loss": 0.345, "step": 15513 }, { "epoch": 2.25, "grad_norm": 9.048836708068848, "learning_rate": 3.1562829609520215e-07, "loss": 0.3777, "step": 15514 }, { "epoch": 2.25, "grad_norm": 8.603590965270996, "learning_rate": 3.155128955683078e-07, "loss": 0.3706, "step": 15515 }, { "epoch": 2.25, "grad_norm": 9.912013053894043, "learning_rate": 3.1539751219012635e-07, "loss": 0.4047, "step": 15516 }, { "epoch": 2.25, "grad_norm": 8.9813871383667, "learning_rate": 3.152821459635472e-07, "loss": 0.3319, "step": 15517 }, { "epoch": 2.25, "grad_norm": 8.717999458312988, "learning_rate": 3.1516679689146195e-07, "loss": 0.35, "step": 15518 }, { "epoch": 2.25, "grad_norm": 8.593589782714844, "learning_rate": 3.150514649767596e-07, "loss": 0.3199, "step": 15519 }, { "epoch": 2.25, "grad_norm": 9.893532752990723, "learning_rate": 3.149361502223299e-07, "loss": 0.4152, "step": 15520 }, { "epoch": 2.25, "grad_norm": 9.26186466217041, "learning_rate": 3.148208526310617e-07, "loss": 0.34, "step": 15521 }, { "epoch": 2.25, "grad_norm": 8.18686294555664, "learning_rate": 3.147055722058438e-07, "loss": 0.3633, "step": 15522 }, { "epoch": 2.25, "grad_norm": 7.570034027099609, "learning_rate": 3.145903089495644e-07, "loss": 0.343, "step": 15523 }, { "epoch": 2.25, "grad_norm": 9.634593963623047, "learning_rate": 3.1447506286511104e-07, "loss": 0.4072, "step": 15524 }, { "epoch": 2.25, "grad_norm": 10.369535446166992, "learning_rate": 3.1435983395537115e-07, "loss": 0.3851, "step": 15525 }, { "epoch": 2.25, "grad_norm": 8.659049987792969, "learning_rate": 3.1424462222323153e-07, "loss": 0.3442, "step": 15526 }, { "epoch": 2.25, "grad_norm": 7.788995265960693, "learning_rate": 3.141294276715788e-07, "loss": 0.3439, "step": 15527 }, { "epoch": 2.25, "grad_norm": 10.9099760055542, "learning_rate": 3.14014250303299e-07, "loss": 0.425, "step": 15528 }, { "epoch": 2.25, "grad_norm": 8.972090721130371, "learning_rate": 3.13899090121277e-07, "loss": 0.4103, "step": 15529 }, { "epoch": 2.25, "grad_norm": 8.673290252685547, "learning_rate": 3.1378394712839904e-07, "loss": 0.347, "step": 15530 }, { "epoch": 2.25, "grad_norm": 9.203719139099121, "learning_rate": 3.136688213275488e-07, "loss": 0.3799, "step": 15531 }, { "epoch": 2.25, "grad_norm": 9.155820846557617, "learning_rate": 3.1355371272161126e-07, "loss": 0.3936, "step": 15532 }, { "epoch": 2.25, "grad_norm": 8.240361213684082, "learning_rate": 3.134386213134702e-07, "loss": 0.3345, "step": 15533 }, { "epoch": 2.25, "grad_norm": 7.018031120300293, "learning_rate": 3.133235471060088e-07, "loss": 0.3182, "step": 15534 }, { "epoch": 2.25, "grad_norm": 8.55919075012207, "learning_rate": 3.132084901021107e-07, "loss": 0.4167, "step": 15535 }, { "epoch": 2.25, "grad_norm": 8.337538719177246, "learning_rate": 3.130934503046572e-07, "loss": 0.3585, "step": 15536 }, { "epoch": 2.25, "grad_norm": 9.595659255981445, "learning_rate": 3.129784277165318e-07, "loss": 0.4116, "step": 15537 }, { "epoch": 2.25, "grad_norm": 8.018596649169922, "learning_rate": 3.12863422340615e-07, "loss": 0.3067, "step": 15538 }, { "epoch": 2.25, "grad_norm": 8.014185905456543, "learning_rate": 3.1274843417978934e-07, "loss": 0.3448, "step": 15539 }, { "epoch": 2.25, "grad_norm": 9.4622220993042, "learning_rate": 3.1263346323693473e-07, "loss": 0.3748, "step": 15540 }, { "epoch": 2.25, "grad_norm": 8.343865394592285, "learning_rate": 3.1251850951493175e-07, "loss": 0.2863, "step": 15541 }, { "epoch": 2.26, "grad_norm": 7.875410556793213, "learning_rate": 3.124035730166605e-07, "loss": 0.3491, "step": 15542 }, { "epoch": 2.26, "grad_norm": 8.198990821838379, "learning_rate": 3.1228865374500056e-07, "loss": 0.3433, "step": 15543 }, { "epoch": 2.26, "grad_norm": 8.561196327209473, "learning_rate": 3.121737517028309e-07, "loss": 0.4101, "step": 15544 }, { "epoch": 2.26, "grad_norm": 9.069955825805664, "learning_rate": 3.120588668930304e-07, "loss": 0.355, "step": 15545 }, { "epoch": 2.26, "grad_norm": 8.781098365783691, "learning_rate": 3.1194399931847716e-07, "loss": 0.3873, "step": 15546 }, { "epoch": 2.26, "grad_norm": 8.253024101257324, "learning_rate": 3.1182914898204893e-07, "loss": 0.3587, "step": 15547 }, { "epoch": 2.26, "grad_norm": 8.350749015808105, "learning_rate": 3.1171431588662333e-07, "loss": 0.3269, "step": 15548 }, { "epoch": 2.26, "grad_norm": 10.545280456542969, "learning_rate": 3.1159950003507754e-07, "loss": 0.4867, "step": 15549 }, { "epoch": 2.26, "grad_norm": 7.9633989334106445, "learning_rate": 3.1148470143028706e-07, "loss": 0.2778, "step": 15550 }, { "epoch": 2.26, "grad_norm": 8.784265518188477, "learning_rate": 3.1136992007512916e-07, "loss": 0.3411, "step": 15551 }, { "epoch": 2.26, "grad_norm": 8.394661903381348, "learning_rate": 3.112551559724785e-07, "loss": 0.3417, "step": 15552 }, { "epoch": 2.26, "grad_norm": 8.336296081542969, "learning_rate": 3.1114040912521155e-07, "loss": 0.3762, "step": 15553 }, { "epoch": 2.26, "grad_norm": 8.141256332397461, "learning_rate": 3.11025679536202e-07, "loss": 0.3489, "step": 15554 }, { "epoch": 2.26, "grad_norm": 8.8342924118042, "learning_rate": 3.1091096720832445e-07, "loss": 0.3783, "step": 15555 }, { "epoch": 2.26, "grad_norm": 7.4398512840271, "learning_rate": 3.1079627214445303e-07, "loss": 0.3198, "step": 15556 }, { "epoch": 2.26, "grad_norm": 9.145394325256348, "learning_rate": 3.106815943474612e-07, "loss": 0.3693, "step": 15557 }, { "epoch": 2.26, "grad_norm": 8.918512344360352, "learning_rate": 3.10566933820222e-07, "loss": 0.4084, "step": 15558 }, { "epoch": 2.26, "grad_norm": 8.785209655761719, "learning_rate": 3.1045229056560776e-07, "loss": 0.3643, "step": 15559 }, { "epoch": 2.26, "grad_norm": 9.518757820129395, "learning_rate": 3.103376645864916e-07, "loss": 0.3257, "step": 15560 }, { "epoch": 2.26, "grad_norm": 9.703276634216309, "learning_rate": 3.1022305588574436e-07, "loss": 0.4412, "step": 15561 }, { "epoch": 2.26, "grad_norm": 7.386748790740967, "learning_rate": 3.1010846446623775e-07, "loss": 0.3198, "step": 15562 }, { "epoch": 2.26, "grad_norm": 8.191615104675293, "learning_rate": 3.0999389033084245e-07, "loss": 0.3075, "step": 15563 }, { "epoch": 2.26, "grad_norm": 7.847204685211182, "learning_rate": 3.0987933348242924e-07, "loss": 0.3257, "step": 15564 }, { "epoch": 2.26, "grad_norm": 8.109237670898438, "learning_rate": 3.097647939238679e-07, "loss": 0.3071, "step": 15565 }, { "epoch": 2.26, "grad_norm": 7.1974358558654785, "learning_rate": 3.0965027165802813e-07, "loss": 0.3178, "step": 15566 }, { "epoch": 2.26, "grad_norm": 8.506264686584473, "learning_rate": 3.0953576668777916e-07, "loss": 0.2963, "step": 15567 }, { "epoch": 2.26, "grad_norm": 8.017067909240723, "learning_rate": 3.094212790159897e-07, "loss": 0.3518, "step": 15568 }, { "epoch": 2.26, "grad_norm": 8.758522033691406, "learning_rate": 3.0930680864552796e-07, "loss": 0.4061, "step": 15569 }, { "epoch": 2.26, "grad_norm": 10.669962882995605, "learning_rate": 3.0919235557926217e-07, "loss": 0.4359, "step": 15570 }, { "epoch": 2.26, "grad_norm": 7.642908573150635, "learning_rate": 3.090779198200588e-07, "loss": 0.3271, "step": 15571 }, { "epoch": 2.26, "grad_norm": 7.764242649078369, "learning_rate": 3.089635013707863e-07, "loss": 0.307, "step": 15572 }, { "epoch": 2.26, "grad_norm": 8.037799835205078, "learning_rate": 3.088491002343097e-07, "loss": 0.3226, "step": 15573 }, { "epoch": 2.26, "grad_norm": 9.976924896240234, "learning_rate": 3.087347164134966e-07, "loss": 0.3772, "step": 15574 }, { "epoch": 2.26, "grad_norm": 7.787374496459961, "learning_rate": 3.0862034991121164e-07, "loss": 0.3472, "step": 15575 }, { "epoch": 2.26, "grad_norm": 7.3595476150512695, "learning_rate": 3.085060007303204e-07, "loss": 0.319, "step": 15576 }, { "epoch": 2.26, "grad_norm": 10.105103492736816, "learning_rate": 3.0839166887368784e-07, "loss": 0.3396, "step": 15577 }, { "epoch": 2.26, "grad_norm": 8.471790313720703, "learning_rate": 3.082773543441781e-07, "loss": 0.3837, "step": 15578 }, { "epoch": 2.26, "grad_norm": 8.651219367980957, "learning_rate": 3.081630571446555e-07, "loss": 0.3273, "step": 15579 }, { "epoch": 2.26, "grad_norm": 7.944328784942627, "learning_rate": 3.0804877727798326e-07, "loss": 0.3922, "step": 15580 }, { "epoch": 2.26, "grad_norm": 9.10106372833252, "learning_rate": 3.079345147470247e-07, "loss": 0.3136, "step": 15581 }, { "epoch": 2.26, "grad_norm": 9.14255142211914, "learning_rate": 3.0782026955464234e-07, "loss": 0.3331, "step": 15582 }, { "epoch": 2.26, "grad_norm": 8.926953315734863, "learning_rate": 3.0770604170369854e-07, "loss": 0.3036, "step": 15583 }, { "epoch": 2.26, "grad_norm": 7.968119144439697, "learning_rate": 3.075918311970549e-07, "loss": 0.3344, "step": 15584 }, { "epoch": 2.26, "grad_norm": 9.048202514648438, "learning_rate": 3.0747763803757285e-07, "loss": 0.4125, "step": 15585 }, { "epoch": 2.26, "grad_norm": 8.719528198242188, "learning_rate": 3.0736346222811347e-07, "loss": 0.3392, "step": 15586 }, { "epoch": 2.26, "grad_norm": 8.751501083374023, "learning_rate": 3.07249303771537e-07, "loss": 0.3735, "step": 15587 }, { "epoch": 2.26, "grad_norm": 10.700409889221191, "learning_rate": 3.071351626707037e-07, "loss": 0.4097, "step": 15588 }, { "epoch": 2.26, "grad_norm": 8.562919616699219, "learning_rate": 3.070210389284732e-07, "loss": 0.3302, "step": 15589 }, { "epoch": 2.26, "grad_norm": 8.48355770111084, "learning_rate": 3.069069325477045e-07, "loss": 0.3179, "step": 15590 }, { "epoch": 2.26, "grad_norm": 8.639795303344727, "learning_rate": 3.067928435312567e-07, "loss": 0.3532, "step": 15591 }, { "epoch": 2.26, "grad_norm": 9.051717758178711, "learning_rate": 3.0667877188198733e-07, "loss": 0.3823, "step": 15592 }, { "epoch": 2.26, "grad_norm": 9.612521171569824, "learning_rate": 3.065647176027554e-07, "loss": 0.356, "step": 15593 }, { "epoch": 2.26, "grad_norm": 7.9792256355285645, "learning_rate": 3.064506806964171e-07, "loss": 0.3299, "step": 15594 }, { "epoch": 2.26, "grad_norm": 9.58707046508789, "learning_rate": 3.063366611658308e-07, "loss": 0.4172, "step": 15595 }, { "epoch": 2.26, "grad_norm": 7.4663615226745605, "learning_rate": 3.062226590138519e-07, "loss": 0.3257, "step": 15596 }, { "epoch": 2.26, "grad_norm": 9.167705535888672, "learning_rate": 3.061086742433372e-07, "loss": 0.3252, "step": 15597 }, { "epoch": 2.26, "grad_norm": 8.424144744873047, "learning_rate": 3.059947068571421e-07, "loss": 0.3449, "step": 15598 }, { "epoch": 2.26, "grad_norm": 8.672627449035645, "learning_rate": 3.0588075685812196e-07, "loss": 0.3826, "step": 15599 }, { "epoch": 2.26, "grad_norm": 8.181897163391113, "learning_rate": 3.057668242491317e-07, "loss": 0.3469, "step": 15600 }, { "epoch": 2.26, "grad_norm": 7.393815517425537, "learning_rate": 3.0565290903302566e-07, "loss": 0.3239, "step": 15601 }, { "epoch": 2.26, "grad_norm": 7.875992774963379, "learning_rate": 3.0553901121265766e-07, "loss": 0.3553, "step": 15602 }, { "epoch": 2.26, "grad_norm": 7.510636806488037, "learning_rate": 3.054251307908815e-07, "loss": 0.3156, "step": 15603 }, { "epoch": 2.26, "grad_norm": 8.001463890075684, "learning_rate": 3.0531126777055007e-07, "loss": 0.3535, "step": 15604 }, { "epoch": 2.26, "grad_norm": 8.629690170288086, "learning_rate": 3.0519742215451636e-07, "loss": 0.3445, "step": 15605 }, { "epoch": 2.26, "grad_norm": 7.4456787109375, "learning_rate": 3.0508359394563175e-07, "loss": 0.3714, "step": 15606 }, { "epoch": 2.26, "grad_norm": 9.222249984741211, "learning_rate": 3.0496978314674916e-07, "loss": 0.3877, "step": 15607 }, { "epoch": 2.26, "grad_norm": 8.475048065185547, "learning_rate": 3.048559897607187e-07, "loss": 0.351, "step": 15608 }, { "epoch": 2.26, "grad_norm": 7.077953815460205, "learning_rate": 3.0474221379039254e-07, "loss": 0.3372, "step": 15609 }, { "epoch": 2.26, "grad_norm": 9.069072723388672, "learning_rate": 3.0462845523862035e-07, "loss": 0.3849, "step": 15610 }, { "epoch": 2.27, "grad_norm": 7.454935073852539, "learning_rate": 3.045147141082519e-07, "loss": 0.3529, "step": 15611 }, { "epoch": 2.27, "grad_norm": 8.564888954162598, "learning_rate": 3.0440099040213794e-07, "loss": 0.3999, "step": 15612 }, { "epoch": 2.27, "grad_norm": 7.6519036293029785, "learning_rate": 3.042872841231262e-07, "loss": 0.343, "step": 15613 }, { "epoch": 2.27, "grad_norm": 7.849648475646973, "learning_rate": 3.0417359527406695e-07, "loss": 0.3419, "step": 15614 }, { "epoch": 2.27, "grad_norm": 8.089988708496094, "learning_rate": 3.0405992385780686e-07, "loss": 0.3037, "step": 15615 }, { "epoch": 2.27, "grad_norm": 7.924487113952637, "learning_rate": 3.039462698771953e-07, "loss": 0.3493, "step": 15616 }, { "epoch": 2.27, "grad_norm": 9.509748458862305, "learning_rate": 3.038326333350787e-07, "loss": 0.395, "step": 15617 }, { "epoch": 2.27, "grad_norm": 9.230435371398926, "learning_rate": 3.0371901423430434e-07, "loss": 0.3518, "step": 15618 }, { "epoch": 2.27, "grad_norm": 8.815305709838867, "learning_rate": 3.0360541257771865e-07, "loss": 0.3817, "step": 15619 }, { "epoch": 2.27, "grad_norm": 8.036857604980469, "learning_rate": 3.034918283681679e-07, "loss": 0.3463, "step": 15620 }, { "epoch": 2.27, "grad_norm": 8.309236526489258, "learning_rate": 3.0337826160849767e-07, "loss": 0.3232, "step": 15621 }, { "epoch": 2.27, "grad_norm": 8.74870491027832, "learning_rate": 3.032647123015532e-07, "loss": 0.3587, "step": 15622 }, { "epoch": 2.27, "grad_norm": 8.110127449035645, "learning_rate": 3.0315118045017917e-07, "loss": 0.3127, "step": 15623 }, { "epoch": 2.27, "grad_norm": 9.143004417419434, "learning_rate": 3.0303766605722025e-07, "loss": 0.3302, "step": 15624 }, { "epoch": 2.27, "grad_norm": 8.583813667297363, "learning_rate": 3.0292416912552e-07, "loss": 0.3416, "step": 15625 }, { "epoch": 2.27, "grad_norm": 8.639765739440918, "learning_rate": 3.0281068965792247e-07, "loss": 0.4317, "step": 15626 }, { "epoch": 2.27, "grad_norm": 8.20124340057373, "learning_rate": 3.026972276572697e-07, "loss": 0.3236, "step": 15627 }, { "epoch": 2.27, "grad_norm": 8.391217231750488, "learning_rate": 3.0258378312640544e-07, "loss": 0.3513, "step": 15628 }, { "epoch": 2.27, "grad_norm": 8.91872501373291, "learning_rate": 3.0247035606817075e-07, "loss": 0.4178, "step": 15629 }, { "epoch": 2.27, "grad_norm": 8.586997985839844, "learning_rate": 3.0235694648540843e-07, "loss": 0.3879, "step": 15630 }, { "epoch": 2.27, "grad_norm": 8.63180160522461, "learning_rate": 3.0224355438095906e-07, "loss": 0.3935, "step": 15631 }, { "epoch": 2.27, "grad_norm": 9.154070854187012, "learning_rate": 3.0213017975766365e-07, "loss": 0.3263, "step": 15632 }, { "epoch": 2.27, "grad_norm": 8.623223304748535, "learning_rate": 3.020168226183627e-07, "loss": 0.294, "step": 15633 }, { "epoch": 2.27, "grad_norm": 8.919775009155273, "learning_rate": 3.019034829658962e-07, "loss": 0.3497, "step": 15634 }, { "epoch": 2.27, "grad_norm": 8.769159317016602, "learning_rate": 3.017901608031036e-07, "loss": 0.365, "step": 15635 }, { "epoch": 2.27, "grad_norm": 7.597170352935791, "learning_rate": 3.016768561328237e-07, "loss": 0.3284, "step": 15636 }, { "epoch": 2.27, "grad_norm": 7.349710464477539, "learning_rate": 3.0156356895789615e-07, "loss": 0.3505, "step": 15637 }, { "epoch": 2.27, "grad_norm": 9.66159439086914, "learning_rate": 3.014502992811583e-07, "loss": 0.3538, "step": 15638 }, { "epoch": 2.27, "grad_norm": 7.548468112945557, "learning_rate": 3.0133704710544804e-07, "loss": 0.3161, "step": 15639 }, { "epoch": 2.27, "grad_norm": 8.01883316040039, "learning_rate": 3.0122381243360295e-07, "loss": 0.3504, "step": 15640 }, { "epoch": 2.27, "grad_norm": 7.571699142456055, "learning_rate": 3.0111059526845986e-07, "loss": 0.3492, "step": 15641 }, { "epoch": 2.27, "grad_norm": 8.697537422180176, "learning_rate": 3.009973956128552e-07, "loss": 0.3899, "step": 15642 }, { "epoch": 2.27, "grad_norm": 9.171306610107422, "learning_rate": 3.00884213469625e-07, "loss": 0.3595, "step": 15643 }, { "epoch": 2.27, "grad_norm": 8.691526412963867, "learning_rate": 3.00771048841605e-07, "loss": 0.3369, "step": 15644 }, { "epoch": 2.27, "grad_norm": 8.097728729248047, "learning_rate": 3.0065790173163006e-07, "loss": 0.3622, "step": 15645 }, { "epoch": 2.27, "grad_norm": 7.545347213745117, "learning_rate": 3.005447721425353e-07, "loss": 0.3193, "step": 15646 }, { "epoch": 2.27, "grad_norm": 8.082762718200684, "learning_rate": 3.0043166007715494e-07, "loss": 0.3162, "step": 15647 }, { "epoch": 2.27, "grad_norm": 8.812517166137695, "learning_rate": 3.003185655383221e-07, "loss": 0.3857, "step": 15648 }, { "epoch": 2.27, "grad_norm": 8.759522438049316, "learning_rate": 3.002054885288713e-07, "loss": 0.4003, "step": 15649 }, { "epoch": 2.27, "grad_norm": 9.711776733398438, "learning_rate": 3.0009242905163434e-07, "loss": 0.4578, "step": 15650 }, { "epoch": 2.27, "grad_norm": 9.603716850280762, "learning_rate": 2.9997938710944504e-07, "loss": 0.3558, "step": 15651 }, { "epoch": 2.27, "grad_norm": 8.196374893188477, "learning_rate": 2.998663627051344e-07, "loss": 0.3208, "step": 15652 }, { "epoch": 2.27, "grad_norm": 8.303272247314453, "learning_rate": 2.997533558415344e-07, "loss": 0.3341, "step": 15653 }, { "epoch": 2.27, "grad_norm": 7.438976287841797, "learning_rate": 2.9964036652147637e-07, "loss": 0.3299, "step": 15654 }, { "epoch": 2.27, "grad_norm": 7.731884002685547, "learning_rate": 2.9952739474779097e-07, "loss": 0.3205, "step": 15655 }, { "epoch": 2.27, "grad_norm": 8.144291877746582, "learning_rate": 2.9941444052330857e-07, "loss": 0.3731, "step": 15656 }, { "epoch": 2.27, "grad_norm": 8.381021499633789, "learning_rate": 2.9930150385085893e-07, "loss": 0.3766, "step": 15657 }, { "epoch": 2.27, "grad_norm": 9.251941680908203, "learning_rate": 2.991885847332717e-07, "loss": 0.362, "step": 15658 }, { "epoch": 2.27, "grad_norm": 7.909496307373047, "learning_rate": 2.9907568317337575e-07, "loss": 0.33, "step": 15659 }, { "epoch": 2.27, "grad_norm": 9.095490455627441, "learning_rate": 2.989627991739997e-07, "loss": 0.4608, "step": 15660 }, { "epoch": 2.27, "grad_norm": 7.643837928771973, "learning_rate": 2.9884993273797187e-07, "loss": 0.3564, "step": 15661 }, { "epoch": 2.27, "grad_norm": 8.821453094482422, "learning_rate": 2.9873708386811924e-07, "loss": 0.351, "step": 15662 }, { "epoch": 2.27, "grad_norm": 7.489406108856201, "learning_rate": 2.9862425256726975e-07, "loss": 0.3473, "step": 15663 }, { "epoch": 2.27, "grad_norm": 8.772262573242188, "learning_rate": 2.9851143883825003e-07, "loss": 0.3795, "step": 15664 }, { "epoch": 2.27, "grad_norm": 7.6912455558776855, "learning_rate": 2.983986426838864e-07, "loss": 0.3615, "step": 15665 }, { "epoch": 2.27, "grad_norm": 8.18497085571289, "learning_rate": 2.982858641070051e-07, "loss": 0.3799, "step": 15666 }, { "epoch": 2.27, "grad_norm": 9.696209907531738, "learning_rate": 2.9817310311043066e-07, "loss": 0.3198, "step": 15667 }, { "epoch": 2.27, "grad_norm": 8.585658073425293, "learning_rate": 2.980603596969895e-07, "loss": 0.3777, "step": 15668 }, { "epoch": 2.27, "grad_norm": 10.192389488220215, "learning_rate": 2.979476338695048e-07, "loss": 0.389, "step": 15669 }, { "epoch": 2.27, "grad_norm": 9.084426879882812, "learning_rate": 2.9783492563080204e-07, "loss": 0.2853, "step": 15670 }, { "epoch": 2.27, "grad_norm": 9.243616104125977, "learning_rate": 2.977222349837037e-07, "loss": 0.3083, "step": 15671 }, { "epoch": 2.27, "grad_norm": 9.43753719329834, "learning_rate": 2.9760956193103424e-07, "loss": 0.416, "step": 15672 }, { "epoch": 2.27, "grad_norm": 9.258925437927246, "learning_rate": 2.974969064756158e-07, "loss": 0.3732, "step": 15673 }, { "epoch": 2.27, "grad_norm": 8.81238842010498, "learning_rate": 2.973842686202708e-07, "loss": 0.3839, "step": 15674 }, { "epoch": 2.27, "grad_norm": 7.741360664367676, "learning_rate": 2.9727164836782137e-07, "loss": 0.3236, "step": 15675 }, { "epoch": 2.27, "grad_norm": 8.407209396362305, "learning_rate": 2.9715904572108897e-07, "loss": 0.3501, "step": 15676 }, { "epoch": 2.27, "grad_norm": 9.47602367401123, "learning_rate": 2.9704646068289464e-07, "loss": 0.354, "step": 15677 }, { "epoch": 2.27, "grad_norm": 9.131322860717773, "learning_rate": 2.96933893256059e-07, "loss": 0.3891, "step": 15678 }, { "epoch": 2.27, "grad_norm": 8.623156547546387, "learning_rate": 2.9682134344340235e-07, "loss": 0.3378, "step": 15679 }, { "epoch": 2.28, "grad_norm": 9.145522117614746, "learning_rate": 2.967088112477445e-07, "loss": 0.395, "step": 15680 }, { "epoch": 2.28, "grad_norm": 8.886651039123535, "learning_rate": 2.9659629667190455e-07, "loss": 0.3583, "step": 15681 }, { "epoch": 2.28, "grad_norm": 8.372520446777344, "learning_rate": 2.964837997187019e-07, "loss": 0.3729, "step": 15682 }, { "epoch": 2.28, "grad_norm": 9.524499893188477, "learning_rate": 2.963713203909538e-07, "loss": 0.4159, "step": 15683 }, { "epoch": 2.28, "grad_norm": 8.264687538146973, "learning_rate": 2.962588586914797e-07, "loss": 0.3893, "step": 15684 }, { "epoch": 2.28, "grad_norm": 7.7413411140441895, "learning_rate": 2.9614641462309573e-07, "loss": 0.3673, "step": 15685 }, { "epoch": 2.28, "grad_norm": 8.075664520263672, "learning_rate": 2.9603398818862037e-07, "loss": 0.3229, "step": 15686 }, { "epoch": 2.28, "grad_norm": 9.602523803710938, "learning_rate": 2.9592157939086926e-07, "loss": 0.3694, "step": 15687 }, { "epoch": 2.28, "grad_norm": 7.701642036437988, "learning_rate": 2.958091882326588e-07, "loss": 0.3097, "step": 15688 }, { "epoch": 2.28, "grad_norm": 7.929319858551025, "learning_rate": 2.956968147168054e-07, "loss": 0.327, "step": 15689 }, { "epoch": 2.28, "grad_norm": 8.299768447875977, "learning_rate": 2.9558445884612337e-07, "loss": 0.3315, "step": 15690 }, { "epoch": 2.28, "grad_norm": 10.122862815856934, "learning_rate": 2.9547212062342885e-07, "loss": 0.4144, "step": 15691 }, { "epoch": 2.28, "grad_norm": 8.548938751220703, "learning_rate": 2.9535980005153516e-07, "loss": 0.3893, "step": 15692 }, { "epoch": 2.28, "grad_norm": 9.462034225463867, "learning_rate": 2.9524749713325694e-07, "loss": 0.3523, "step": 15693 }, { "epoch": 2.28, "grad_norm": 8.632031440734863, "learning_rate": 2.9513521187140746e-07, "loss": 0.3562, "step": 15694 }, { "epoch": 2.28, "grad_norm": 7.689953327178955, "learning_rate": 2.950229442687999e-07, "loss": 0.3659, "step": 15695 }, { "epoch": 2.28, "grad_norm": 8.958850860595703, "learning_rate": 2.949106943282471e-07, "loss": 0.3343, "step": 15696 }, { "epoch": 2.28, "grad_norm": 8.233062744140625, "learning_rate": 2.947984620525612e-07, "loss": 0.3457, "step": 15697 }, { "epoch": 2.28, "grad_norm": 8.646404266357422, "learning_rate": 2.9468624744455406e-07, "loss": 0.4057, "step": 15698 }, { "epoch": 2.28, "grad_norm": 8.412534713745117, "learning_rate": 2.945740505070369e-07, "loss": 0.347, "step": 15699 }, { "epoch": 2.28, "grad_norm": 8.5559663772583, "learning_rate": 2.9446187124282075e-07, "loss": 0.3602, "step": 15700 }, { "epoch": 2.28, "grad_norm": 10.036746978759766, "learning_rate": 2.943497096547163e-07, "loss": 0.3126, "step": 15701 }, { "epoch": 2.28, "grad_norm": 8.346080780029297, "learning_rate": 2.942375657455327e-07, "loss": 0.3243, "step": 15702 }, { "epoch": 2.28, "grad_norm": 7.948116302490234, "learning_rate": 2.941254395180808e-07, "loss": 0.371, "step": 15703 }, { "epoch": 2.28, "grad_norm": 9.030489921569824, "learning_rate": 2.940133309751683e-07, "loss": 0.3582, "step": 15704 }, { "epoch": 2.28, "grad_norm": 9.292425155639648, "learning_rate": 2.939012401196055e-07, "loss": 0.3796, "step": 15705 }, { "epoch": 2.28, "grad_norm": 8.572978973388672, "learning_rate": 2.937891669541992e-07, "loss": 0.3328, "step": 15706 }, { "epoch": 2.28, "grad_norm": 6.882840156555176, "learning_rate": 2.9367711148175843e-07, "loss": 0.3059, "step": 15707 }, { "epoch": 2.28, "grad_norm": 7.429742813110352, "learning_rate": 2.935650737050897e-07, "loss": 0.3263, "step": 15708 }, { "epoch": 2.28, "grad_norm": 8.974761962890625, "learning_rate": 2.9345305362700013e-07, "loss": 0.3007, "step": 15709 }, { "epoch": 2.28, "grad_norm": 7.901477336883545, "learning_rate": 2.933410512502964e-07, "loss": 0.37, "step": 15710 }, { "epoch": 2.28, "grad_norm": 10.024466514587402, "learning_rate": 2.9322906657778435e-07, "loss": 0.4368, "step": 15711 }, { "epoch": 2.28, "grad_norm": 9.08138370513916, "learning_rate": 2.931170996122696e-07, "loss": 0.3504, "step": 15712 }, { "epoch": 2.28, "grad_norm": 8.502138137817383, "learning_rate": 2.930051503565575e-07, "loss": 0.3262, "step": 15713 }, { "epoch": 2.28, "grad_norm": 8.717901229858398, "learning_rate": 2.9289321881345254e-07, "loss": 0.3566, "step": 15714 }, { "epoch": 2.28, "grad_norm": 9.600932121276855, "learning_rate": 2.92781304985759e-07, "loss": 0.4175, "step": 15715 }, { "epoch": 2.28, "grad_norm": 7.771026134490967, "learning_rate": 2.926694088762809e-07, "loss": 0.331, "step": 15716 }, { "epoch": 2.28, "grad_norm": 8.848403930664062, "learning_rate": 2.9255753048782126e-07, "loss": 0.3577, "step": 15717 }, { "epoch": 2.28, "grad_norm": 8.85793685913086, "learning_rate": 2.924456698231834e-07, "loss": 0.3709, "step": 15718 }, { "epoch": 2.28, "grad_norm": 7.894181728363037, "learning_rate": 2.9233382688516963e-07, "loss": 0.3221, "step": 15719 }, { "epoch": 2.28, "grad_norm": 7.596523761749268, "learning_rate": 2.922220016765818e-07, "loss": 0.3281, "step": 15720 }, { "epoch": 2.28, "grad_norm": 9.118660926818848, "learning_rate": 2.9211019420022186e-07, "loss": 0.4083, "step": 15721 }, { "epoch": 2.28, "grad_norm": 8.098231315612793, "learning_rate": 2.919984044588911e-07, "loss": 0.3238, "step": 15722 }, { "epoch": 2.28, "grad_norm": 7.800029754638672, "learning_rate": 2.918866324553894e-07, "loss": 0.3003, "step": 15723 }, { "epoch": 2.28, "grad_norm": 9.816554069519043, "learning_rate": 2.9177487819251803e-07, "loss": 0.2829, "step": 15724 }, { "epoch": 2.28, "grad_norm": 7.22283935546875, "learning_rate": 2.916631416730757e-07, "loss": 0.3151, "step": 15725 }, { "epoch": 2.28, "grad_norm": 9.061860084533691, "learning_rate": 2.9155142289986314e-07, "loss": 0.415, "step": 15726 }, { "epoch": 2.28, "grad_norm": 7.794870853424072, "learning_rate": 2.9143972187567803e-07, "loss": 0.3331, "step": 15727 }, { "epoch": 2.28, "grad_norm": 7.59182596206665, "learning_rate": 2.913280386033199e-07, "loss": 0.3543, "step": 15728 }, { "epoch": 2.28, "grad_norm": 8.218066215515137, "learning_rate": 2.91216373085586e-07, "loss": 0.3417, "step": 15729 }, { "epoch": 2.28, "grad_norm": 8.783541679382324, "learning_rate": 2.911047253252742e-07, "loss": 0.377, "step": 15730 }, { "epoch": 2.28, "grad_norm": 6.745778560638428, "learning_rate": 2.9099309532518166e-07, "loss": 0.2998, "step": 15731 }, { "epoch": 2.28, "grad_norm": 7.190971374511719, "learning_rate": 2.9088148308810514e-07, "loss": 0.3137, "step": 15732 }, { "epoch": 2.28, "grad_norm": 8.969128608703613, "learning_rate": 2.907698886168408e-07, "loss": 0.3265, "step": 15733 }, { "epoch": 2.28, "grad_norm": 9.726146697998047, "learning_rate": 2.9065831191418466e-07, "loss": 0.4501, "step": 15734 }, { "epoch": 2.28, "grad_norm": 9.088167190551758, "learning_rate": 2.9054675298293185e-07, "loss": 0.3413, "step": 15735 }, { "epoch": 2.28, "grad_norm": 8.916587829589844, "learning_rate": 2.904352118258775e-07, "loss": 0.314, "step": 15736 }, { "epoch": 2.28, "grad_norm": 8.031291961669922, "learning_rate": 2.903236884458159e-07, "loss": 0.3436, "step": 15737 }, { "epoch": 2.28, "grad_norm": 9.100247383117676, "learning_rate": 2.902121828455415e-07, "loss": 0.3334, "step": 15738 }, { "epoch": 2.28, "grad_norm": 9.548150062561035, "learning_rate": 2.9010069502784697e-07, "loss": 0.386, "step": 15739 }, { "epoch": 2.28, "grad_norm": 7.6333842277526855, "learning_rate": 2.8998922499552667e-07, "loss": 0.3299, "step": 15740 }, { "epoch": 2.28, "grad_norm": 8.187947273254395, "learning_rate": 2.8987777275137213e-07, "loss": 0.346, "step": 15741 }, { "epoch": 2.28, "grad_norm": 7.946376323699951, "learning_rate": 2.8976633829817644e-07, "loss": 0.3394, "step": 15742 }, { "epoch": 2.28, "grad_norm": 9.727848052978516, "learning_rate": 2.896549216387314e-07, "loss": 0.3632, "step": 15743 }, { "epoch": 2.28, "grad_norm": 8.206131935119629, "learning_rate": 2.895435227758276e-07, "loss": 0.3374, "step": 15744 }, { "epoch": 2.28, "grad_norm": 8.784324645996094, "learning_rate": 2.8943214171225693e-07, "loss": 0.3097, "step": 15745 }, { "epoch": 2.28, "grad_norm": 8.265180587768555, "learning_rate": 2.8932077845080895e-07, "loss": 0.3104, "step": 15746 }, { "epoch": 2.28, "grad_norm": 7.47025728225708, "learning_rate": 2.892094329942746e-07, "loss": 0.3314, "step": 15747 }, { "epoch": 2.28, "grad_norm": 8.639408111572266, "learning_rate": 2.8909810534544277e-07, "loss": 0.3559, "step": 15748 }, { "epoch": 2.29, "grad_norm": 8.269460678100586, "learning_rate": 2.889867955071028e-07, "loss": 0.3012, "step": 15749 }, { "epoch": 2.29, "grad_norm": 8.415939331054688, "learning_rate": 2.8887550348204335e-07, "loss": 0.3649, "step": 15750 }, { "epoch": 2.29, "grad_norm": 9.096699714660645, "learning_rate": 2.8876422927305276e-07, "loss": 0.3466, "step": 15751 }, { "epoch": 2.29, "grad_norm": 7.802103042602539, "learning_rate": 2.886529728829188e-07, "loss": 0.3067, "step": 15752 }, { "epoch": 2.29, "grad_norm": 8.51968765258789, "learning_rate": 2.8854173431442875e-07, "loss": 0.3354, "step": 15753 }, { "epoch": 2.29, "grad_norm": 8.375554084777832, "learning_rate": 2.884305135703695e-07, "loss": 0.3458, "step": 15754 }, { "epoch": 2.29, "grad_norm": 8.741117477416992, "learning_rate": 2.8831931065352765e-07, "loss": 0.3539, "step": 15755 }, { "epoch": 2.29, "grad_norm": 8.629409790039062, "learning_rate": 2.8820812556668906e-07, "loss": 0.3852, "step": 15756 }, { "epoch": 2.29, "grad_norm": 7.6813507080078125, "learning_rate": 2.8809695831263957e-07, "loss": 0.3161, "step": 15757 }, { "epoch": 2.29, "grad_norm": 7.5459303855896, "learning_rate": 2.8798580889416357e-07, "loss": 0.3138, "step": 15758 }, { "epoch": 2.29, "grad_norm": 7.8380022048950195, "learning_rate": 2.878746773140468e-07, "loss": 0.3283, "step": 15759 }, { "epoch": 2.29, "grad_norm": 9.061274528503418, "learning_rate": 2.8776356357507236e-07, "loss": 0.4133, "step": 15760 }, { "epoch": 2.29, "grad_norm": 9.746857643127441, "learning_rate": 2.8765246768002506e-07, "loss": 0.3736, "step": 15761 }, { "epoch": 2.29, "grad_norm": 8.581453323364258, "learning_rate": 2.8754138963168716e-07, "loss": 0.3958, "step": 15762 }, { "epoch": 2.29, "grad_norm": 9.798126220703125, "learning_rate": 2.8743032943284284e-07, "loss": 0.3782, "step": 15763 }, { "epoch": 2.29, "grad_norm": 8.439106941223145, "learning_rate": 2.873192870862734e-07, "loss": 0.3596, "step": 15764 }, { "epoch": 2.29, "grad_norm": 8.320805549621582, "learning_rate": 2.8720826259476127e-07, "loss": 0.335, "step": 15765 }, { "epoch": 2.29, "grad_norm": 9.40099048614502, "learning_rate": 2.870972559610879e-07, "loss": 0.4084, "step": 15766 }, { "epoch": 2.29, "grad_norm": 7.936568260192871, "learning_rate": 2.869862671880342e-07, "loss": 0.3052, "step": 15767 }, { "epoch": 2.29, "grad_norm": 9.171090126037598, "learning_rate": 2.868752962783817e-07, "loss": 0.3647, "step": 15768 }, { "epoch": 2.29, "grad_norm": 7.744574546813965, "learning_rate": 2.867643432349095e-07, "loss": 0.3077, "step": 15769 }, { "epoch": 2.29, "grad_norm": 8.953665733337402, "learning_rate": 2.8665340806039797e-07, "loss": 0.2992, "step": 15770 }, { "epoch": 2.29, "grad_norm": 8.658878326416016, "learning_rate": 2.8654249075762613e-07, "loss": 0.3396, "step": 15771 }, { "epoch": 2.29, "grad_norm": 8.778427124023438, "learning_rate": 2.864315913293729e-07, "loss": 0.3059, "step": 15772 }, { "epoch": 2.29, "grad_norm": 7.9741291999816895, "learning_rate": 2.8632070977841684e-07, "loss": 0.338, "step": 15773 }, { "epoch": 2.29, "grad_norm": 9.83716869354248, "learning_rate": 2.8620984610753564e-07, "loss": 0.4179, "step": 15774 }, { "epoch": 2.29, "grad_norm": 8.214204788208008, "learning_rate": 2.8609900031950705e-07, "loss": 0.3381, "step": 15775 }, { "epoch": 2.29, "grad_norm": 7.815609931945801, "learning_rate": 2.8598817241710806e-07, "loss": 0.331, "step": 15776 }, { "epoch": 2.29, "grad_norm": 8.260290145874023, "learning_rate": 2.8587736240311524e-07, "loss": 0.3459, "step": 15777 }, { "epoch": 2.29, "grad_norm": 8.248766899108887, "learning_rate": 2.8576657028030506e-07, "loss": 0.3489, "step": 15778 }, { "epoch": 2.29, "grad_norm": 7.180727005004883, "learning_rate": 2.856557960514524e-07, "loss": 0.326, "step": 15779 }, { "epoch": 2.29, "grad_norm": 7.632447242736816, "learning_rate": 2.8554503971933364e-07, "loss": 0.3057, "step": 15780 }, { "epoch": 2.29, "grad_norm": 7.960522174835205, "learning_rate": 2.854343012867224e-07, "loss": 0.3423, "step": 15781 }, { "epoch": 2.29, "grad_norm": 7.588595867156982, "learning_rate": 2.853235807563944e-07, "loss": 0.3691, "step": 15782 }, { "epoch": 2.29, "grad_norm": 9.40050220489502, "learning_rate": 2.852128781311225e-07, "loss": 0.3677, "step": 15783 }, { "epoch": 2.29, "grad_norm": 8.220617294311523, "learning_rate": 2.8510219341368046e-07, "loss": 0.3339, "step": 15784 }, { "epoch": 2.29, "grad_norm": 8.755074501037598, "learning_rate": 2.8499152660684155e-07, "loss": 0.4036, "step": 15785 }, { "epoch": 2.29, "grad_norm": 8.135869979858398, "learning_rate": 2.84880877713378e-07, "loss": 0.3496, "step": 15786 }, { "epoch": 2.29, "grad_norm": 7.693538188934326, "learning_rate": 2.8477024673606223e-07, "loss": 0.3245, "step": 15787 }, { "epoch": 2.29, "grad_norm": 8.741584777832031, "learning_rate": 2.8465963367766575e-07, "loss": 0.4214, "step": 15788 }, { "epoch": 2.29, "grad_norm": 9.200831413269043, "learning_rate": 2.845490385409598e-07, "loss": 0.3821, "step": 15789 }, { "epoch": 2.29, "grad_norm": 7.556762218475342, "learning_rate": 2.844384613287153e-07, "loss": 0.3491, "step": 15790 }, { "epoch": 2.29, "grad_norm": 9.173259735107422, "learning_rate": 2.8432790204370247e-07, "loss": 0.3736, "step": 15791 }, { "epoch": 2.29, "grad_norm": 7.807951927185059, "learning_rate": 2.842173606886915e-07, "loss": 0.332, "step": 15792 }, { "epoch": 2.29, "grad_norm": 8.374691009521484, "learning_rate": 2.841068372664509e-07, "loss": 0.3486, "step": 15793 }, { "epoch": 2.29, "grad_norm": 8.482172966003418, "learning_rate": 2.8399633177975056e-07, "loss": 0.3144, "step": 15794 }, { "epoch": 2.29, "grad_norm": 7.742213249206543, "learning_rate": 2.8388584423135873e-07, "loss": 0.3786, "step": 15795 }, { "epoch": 2.29, "grad_norm": 8.295493125915527, "learning_rate": 2.8377537462404365e-07, "loss": 0.3462, "step": 15796 }, { "epoch": 2.29, "grad_norm": 8.241083145141602, "learning_rate": 2.8366492296057274e-07, "loss": 0.3738, "step": 15797 }, { "epoch": 2.29, "grad_norm": 8.648326873779297, "learning_rate": 2.835544892437133e-07, "loss": 0.3036, "step": 15798 }, { "epoch": 2.29, "grad_norm": 7.806292533874512, "learning_rate": 2.834440734762322e-07, "loss": 0.3474, "step": 15799 }, { "epoch": 2.29, "grad_norm": 8.438838005065918, "learning_rate": 2.8333367566089505e-07, "loss": 0.3084, "step": 15800 }, { "epoch": 2.29, "grad_norm": 8.434365272521973, "learning_rate": 2.832232958004688e-07, "loss": 0.3411, "step": 15801 }, { "epoch": 2.29, "grad_norm": 7.65503454208374, "learning_rate": 2.831129338977176e-07, "loss": 0.3399, "step": 15802 }, { "epoch": 2.29, "grad_norm": 7.230340957641602, "learning_rate": 2.830025899554077e-07, "loss": 0.2967, "step": 15803 }, { "epoch": 2.29, "grad_norm": 9.849778175354004, "learning_rate": 2.8289226397630253e-07, "loss": 0.4444, "step": 15804 }, { "epoch": 2.29, "grad_norm": 9.717540740966797, "learning_rate": 2.8278195596316656e-07, "loss": 0.3751, "step": 15805 }, { "epoch": 2.29, "grad_norm": 8.513652801513672, "learning_rate": 2.826716659187632e-07, "loss": 0.3356, "step": 15806 }, { "epoch": 2.29, "grad_norm": 10.450145721435547, "learning_rate": 2.8256139384585587e-07, "loss": 0.4041, "step": 15807 }, { "epoch": 2.29, "grad_norm": 8.38573932647705, "learning_rate": 2.8245113974720704e-07, "loss": 0.3644, "step": 15808 }, { "epoch": 2.29, "grad_norm": 8.510721206665039, "learning_rate": 2.8234090362557906e-07, "loss": 0.3457, "step": 15809 }, { "epoch": 2.29, "grad_norm": 9.328201293945312, "learning_rate": 2.8223068548373364e-07, "loss": 0.3844, "step": 15810 }, { "epoch": 2.29, "grad_norm": 9.804835319519043, "learning_rate": 2.821204853244321e-07, "loss": 0.3881, "step": 15811 }, { "epoch": 2.29, "grad_norm": 8.379133224487305, "learning_rate": 2.820103031504354e-07, "loss": 0.3348, "step": 15812 }, { "epoch": 2.29, "grad_norm": 7.874580383300781, "learning_rate": 2.8190013896450427e-07, "loss": 0.3399, "step": 15813 }, { "epoch": 2.29, "grad_norm": 9.122937202453613, "learning_rate": 2.8178999276939773e-07, "loss": 0.324, "step": 15814 }, { "epoch": 2.29, "grad_norm": 9.461217880249023, "learning_rate": 2.816798645678765e-07, "loss": 0.3604, "step": 15815 }, { "epoch": 2.29, "grad_norm": 8.88895320892334, "learning_rate": 2.8156975436269856e-07, "loss": 0.4148, "step": 15816 }, { "epoch": 2.29, "grad_norm": 9.29474925994873, "learning_rate": 2.814596621566238e-07, "loss": 0.3203, "step": 15817 }, { "epoch": 2.3, "grad_norm": 9.41740894317627, "learning_rate": 2.8134958795240893e-07, "loss": 0.3805, "step": 15818 }, { "epoch": 2.3, "grad_norm": 6.9606523513793945, "learning_rate": 2.812395317528129e-07, "loss": 0.3101, "step": 15819 }, { "epoch": 2.3, "grad_norm": 9.511726379394531, "learning_rate": 2.811294935605928e-07, "loss": 0.3989, "step": 15820 }, { "epoch": 2.3, "grad_norm": 9.677879333496094, "learning_rate": 2.810194733785045e-07, "loss": 0.3809, "step": 15821 }, { "epoch": 2.3, "grad_norm": 8.546684265136719, "learning_rate": 2.8090947120930574e-07, "loss": 0.3792, "step": 15822 }, { "epoch": 2.3, "grad_norm": 8.311877250671387, "learning_rate": 2.807994870557512e-07, "loss": 0.2924, "step": 15823 }, { "epoch": 2.3, "grad_norm": 7.505640983581543, "learning_rate": 2.806895209205975e-07, "loss": 0.3456, "step": 15824 }, { "epoch": 2.3, "grad_norm": 8.229254722595215, "learning_rate": 2.8057957280659885e-07, "loss": 0.3509, "step": 15825 }, { "epoch": 2.3, "grad_norm": 8.623119354248047, "learning_rate": 2.8046964271651e-07, "loss": 0.365, "step": 15826 }, { "epoch": 2.3, "grad_norm": 9.09416389465332, "learning_rate": 2.8035973065308514e-07, "loss": 0.3514, "step": 15827 }, { "epoch": 2.3, "grad_norm": 7.452428817749023, "learning_rate": 2.80249836619078e-07, "loss": 0.3322, "step": 15828 }, { "epoch": 2.3, "grad_norm": 8.470375061035156, "learning_rate": 2.801399606172418e-07, "loss": 0.3506, "step": 15829 }, { "epoch": 2.3, "grad_norm": 8.925299644470215, "learning_rate": 2.8003010265032903e-07, "loss": 0.287, "step": 15830 }, { "epoch": 2.3, "grad_norm": 8.748433113098145, "learning_rate": 2.7992026272109236e-07, "loss": 0.3554, "step": 15831 }, { "epoch": 2.3, "grad_norm": 8.193862915039062, "learning_rate": 2.7981044083228343e-07, "loss": 0.3485, "step": 15832 }, { "epoch": 2.3, "grad_norm": 8.602228164672852, "learning_rate": 2.797006369866537e-07, "loss": 0.389, "step": 15833 }, { "epoch": 2.3, "grad_norm": 7.952898025512695, "learning_rate": 2.7959085118695457e-07, "loss": 0.3249, "step": 15834 }, { "epoch": 2.3, "grad_norm": 8.588133811950684, "learning_rate": 2.794810834359355e-07, "loss": 0.3546, "step": 15835 }, { "epoch": 2.3, "grad_norm": 8.281170845031738, "learning_rate": 2.7937133373634756e-07, "loss": 0.3151, "step": 15836 }, { "epoch": 2.3, "grad_norm": 9.309525489807129, "learning_rate": 2.7926160209093954e-07, "loss": 0.398, "step": 15837 }, { "epoch": 2.3, "grad_norm": 7.6726531982421875, "learning_rate": 2.791518885024616e-07, "loss": 0.3126, "step": 15838 }, { "epoch": 2.3, "grad_norm": 8.517293930053711, "learning_rate": 2.790421929736615e-07, "loss": 0.3213, "step": 15839 }, { "epoch": 2.3, "grad_norm": 7.704545021057129, "learning_rate": 2.789325155072878e-07, "loss": 0.3921, "step": 15840 }, { "epoch": 2.3, "grad_norm": 9.074429512023926, "learning_rate": 2.788228561060884e-07, "loss": 0.3553, "step": 15841 }, { "epoch": 2.3, "grad_norm": 7.743269443511963, "learning_rate": 2.787132147728105e-07, "loss": 0.3545, "step": 15842 }, { "epoch": 2.3, "grad_norm": 8.8330078125, "learning_rate": 2.7860359151020095e-07, "loss": 0.3157, "step": 15843 }, { "epoch": 2.3, "grad_norm": 9.383448600769043, "learning_rate": 2.784939863210064e-07, "loss": 0.3742, "step": 15844 }, { "epoch": 2.3, "grad_norm": 7.776323318481445, "learning_rate": 2.7838439920797264e-07, "loss": 0.3672, "step": 15845 }, { "epoch": 2.3, "grad_norm": 8.89078426361084, "learning_rate": 2.782748301738452e-07, "loss": 0.3856, "step": 15846 }, { "epoch": 2.3, "grad_norm": 9.060890197753906, "learning_rate": 2.7816527922136934e-07, "loss": 0.3357, "step": 15847 }, { "epoch": 2.3, "grad_norm": 9.060068130493164, "learning_rate": 2.780557463532894e-07, "loss": 0.3209, "step": 15848 }, { "epoch": 2.3, "grad_norm": 8.880182266235352, "learning_rate": 2.7794623157234986e-07, "loss": 0.3937, "step": 15849 }, { "epoch": 2.3, "grad_norm": 7.988264560699463, "learning_rate": 2.778367348812941e-07, "loss": 0.3427, "step": 15850 }, { "epoch": 2.3, "grad_norm": 8.515721321105957, "learning_rate": 2.7772725628286574e-07, "loss": 0.387, "step": 15851 }, { "epoch": 2.3, "grad_norm": 9.033493041992188, "learning_rate": 2.7761779577980724e-07, "loss": 0.3711, "step": 15852 }, { "epoch": 2.3, "grad_norm": 8.304866790771484, "learning_rate": 2.775083533748612e-07, "loss": 0.3262, "step": 15853 }, { "epoch": 2.3, "grad_norm": 8.241534233093262, "learning_rate": 2.7739892907076946e-07, "loss": 0.361, "step": 15854 }, { "epoch": 2.3, "grad_norm": 7.8621439933776855, "learning_rate": 2.7728952287027376e-07, "loss": 0.3328, "step": 15855 }, { "epoch": 2.3, "grad_norm": 8.836315155029297, "learning_rate": 2.771801347761141e-07, "loss": 0.3284, "step": 15856 }, { "epoch": 2.3, "grad_norm": 7.8530964851379395, "learning_rate": 2.770707647910324e-07, "loss": 0.3227, "step": 15857 }, { "epoch": 2.3, "grad_norm": 8.200413703918457, "learning_rate": 2.769614129177673e-07, "loss": 0.3726, "step": 15858 }, { "epoch": 2.3, "grad_norm": 7.187547206878662, "learning_rate": 2.7685207915905994e-07, "loss": 0.3521, "step": 15859 }, { "epoch": 2.3, "grad_norm": 7.712226390838623, "learning_rate": 2.7674276351764835e-07, "loss": 0.3312, "step": 15860 }, { "epoch": 2.3, "grad_norm": 9.528263092041016, "learning_rate": 2.766334659962718e-07, "loss": 0.4029, "step": 15861 }, { "epoch": 2.3, "grad_norm": 9.191197395324707, "learning_rate": 2.765241865976683e-07, "loss": 0.3605, "step": 15862 }, { "epoch": 2.3, "grad_norm": 9.315832138061523, "learning_rate": 2.7641492532457577e-07, "loss": 0.3643, "step": 15863 }, { "epoch": 2.3, "grad_norm": 10.774186134338379, "learning_rate": 2.7630568217973163e-07, "loss": 0.4284, "step": 15864 }, { "epoch": 2.3, "grad_norm": 8.366059303283691, "learning_rate": 2.7619645716587267e-07, "loss": 0.3934, "step": 15865 }, { "epoch": 2.3, "grad_norm": 8.068426132202148, "learning_rate": 2.760872502857354e-07, "loss": 0.3737, "step": 15866 }, { "epoch": 2.3, "grad_norm": 9.29906940460205, "learning_rate": 2.7597806154205595e-07, "loss": 0.3786, "step": 15867 }, { "epoch": 2.3, "grad_norm": 9.001420974731445, "learning_rate": 2.758688909375697e-07, "loss": 0.3204, "step": 15868 }, { "epoch": 2.3, "grad_norm": 8.718722343444824, "learning_rate": 2.757597384750121e-07, "loss": 0.3328, "step": 15869 }, { "epoch": 2.3, "grad_norm": 8.833667755126953, "learning_rate": 2.756506041571169e-07, "loss": 0.3184, "step": 15870 }, { "epoch": 2.3, "grad_norm": 7.625581741333008, "learning_rate": 2.7554148798661914e-07, "loss": 0.3627, "step": 15871 }, { "epoch": 2.3, "grad_norm": 9.287434577941895, "learning_rate": 2.7543238996625226e-07, "loss": 0.3342, "step": 15872 }, { "epoch": 2.3, "grad_norm": 8.354820251464844, "learning_rate": 2.7532331009874964e-07, "loss": 0.3464, "step": 15873 }, { "epoch": 2.3, "grad_norm": 9.178847312927246, "learning_rate": 2.752142483868439e-07, "loss": 0.4003, "step": 15874 }, { "epoch": 2.3, "grad_norm": 9.64892292022705, "learning_rate": 2.751052048332676e-07, "loss": 0.346, "step": 15875 }, { "epoch": 2.3, "grad_norm": 8.673805236816406, "learning_rate": 2.749961794407528e-07, "loss": 0.3183, "step": 15876 }, { "epoch": 2.3, "grad_norm": 7.429399013519287, "learning_rate": 2.748871722120302e-07, "loss": 0.3375, "step": 15877 }, { "epoch": 2.3, "grad_norm": 8.936037063598633, "learning_rate": 2.747781831498319e-07, "loss": 0.3582, "step": 15878 }, { "epoch": 2.3, "grad_norm": 8.607379913330078, "learning_rate": 2.746692122568872e-07, "loss": 0.3655, "step": 15879 }, { "epoch": 2.3, "grad_norm": 10.115154266357422, "learning_rate": 2.745602595359274e-07, "loss": 0.4127, "step": 15880 }, { "epoch": 2.3, "grad_norm": 9.066469192504883, "learning_rate": 2.7445132498968135e-07, "loss": 0.3621, "step": 15881 }, { "epoch": 2.3, "grad_norm": 8.222122192382812, "learning_rate": 2.7434240862087855e-07, "loss": 0.4106, "step": 15882 }, { "epoch": 2.3, "grad_norm": 9.095548629760742, "learning_rate": 2.742335104322475e-07, "loss": 0.353, "step": 15883 }, { "epoch": 2.3, "grad_norm": 8.26669692993164, "learning_rate": 2.741246304265167e-07, "loss": 0.3585, "step": 15884 }, { "epoch": 2.3, "grad_norm": 8.239079475402832, "learning_rate": 2.740157686064138e-07, "loss": 0.3104, "step": 15885 }, { "epoch": 2.3, "grad_norm": 8.400876998901367, "learning_rate": 2.7390692497466616e-07, "loss": 0.3328, "step": 15886 }, { "epoch": 2.31, "grad_norm": 8.608294486999512, "learning_rate": 2.737980995340009e-07, "loss": 0.3638, "step": 15887 }, { "epoch": 2.31, "grad_norm": 9.882271766662598, "learning_rate": 2.736892922871442e-07, "loss": 0.3691, "step": 15888 }, { "epoch": 2.31, "grad_norm": 10.023859024047852, "learning_rate": 2.735805032368221e-07, "loss": 0.3526, "step": 15889 }, { "epoch": 2.31, "grad_norm": 9.172473907470703, "learning_rate": 2.734717323857606e-07, "loss": 0.338, "step": 15890 }, { "epoch": 2.31, "grad_norm": 7.777688026428223, "learning_rate": 2.733629797366836e-07, "loss": 0.3357, "step": 15891 }, { "epoch": 2.31, "grad_norm": 8.227275848388672, "learning_rate": 2.7325424529231725e-07, "loss": 0.372, "step": 15892 }, { "epoch": 2.31, "grad_norm": 8.008692741394043, "learning_rate": 2.731455290553842e-07, "loss": 0.3553, "step": 15893 }, { "epoch": 2.31, "grad_norm": 9.031139373779297, "learning_rate": 2.7303683102860966e-07, "loss": 0.3925, "step": 15894 }, { "epoch": 2.31, "grad_norm": 7.8216471672058105, "learning_rate": 2.7292815121471577e-07, "loss": 0.3125, "step": 15895 }, { "epoch": 2.31, "grad_norm": 8.21021842956543, "learning_rate": 2.7281948961642566e-07, "loss": 0.3432, "step": 15896 }, { "epoch": 2.31, "grad_norm": 8.58335018157959, "learning_rate": 2.727108462364617e-07, "loss": 0.3826, "step": 15897 }, { "epoch": 2.31, "grad_norm": 7.904296398162842, "learning_rate": 2.726022210775455e-07, "loss": 0.3251, "step": 15898 }, { "epoch": 2.31, "grad_norm": 9.581744194030762, "learning_rate": 2.724936141423992e-07, "loss": 0.3615, "step": 15899 }, { "epoch": 2.31, "grad_norm": 8.309903144836426, "learning_rate": 2.723850254337429e-07, "loss": 0.3836, "step": 15900 }, { "epoch": 2.31, "grad_norm": 9.382285118103027, "learning_rate": 2.722764549542981e-07, "loss": 0.3686, "step": 15901 }, { "epoch": 2.31, "grad_norm": 8.163189888000488, "learning_rate": 2.7216790270678403e-07, "loss": 0.3188, "step": 15902 }, { "epoch": 2.31, "grad_norm": 9.940444946289062, "learning_rate": 2.720593686939204e-07, "loss": 0.4083, "step": 15903 }, { "epoch": 2.31, "grad_norm": 7.440356731414795, "learning_rate": 2.7195085291842667e-07, "loss": 0.3259, "step": 15904 }, { "epoch": 2.31, "grad_norm": 8.51828384399414, "learning_rate": 2.7184235538302126e-07, "loss": 0.3963, "step": 15905 }, { "epoch": 2.31, "grad_norm": 9.03316593170166, "learning_rate": 2.7173387609042256e-07, "loss": 0.322, "step": 15906 }, { "epoch": 2.31, "grad_norm": 8.649327278137207, "learning_rate": 2.7162541504334823e-07, "loss": 0.4181, "step": 15907 }, { "epoch": 2.31, "grad_norm": 6.8256120681762695, "learning_rate": 2.7151697224451577e-07, "loss": 0.3178, "step": 15908 }, { "epoch": 2.31, "grad_norm": 8.017770767211914, "learning_rate": 2.714085476966418e-07, "loss": 0.3334, "step": 15909 }, { "epoch": 2.31, "grad_norm": 8.788728713989258, "learning_rate": 2.7130014140244286e-07, "loss": 0.3531, "step": 15910 }, { "epoch": 2.31, "grad_norm": 8.069719314575195, "learning_rate": 2.711917533646352e-07, "loss": 0.3033, "step": 15911 }, { "epoch": 2.31, "grad_norm": 8.705878257751465, "learning_rate": 2.710833835859333e-07, "loss": 0.3749, "step": 15912 }, { "epoch": 2.31, "grad_norm": 10.956557273864746, "learning_rate": 2.709750320690535e-07, "loss": 0.3867, "step": 15913 }, { "epoch": 2.31, "grad_norm": 10.01198959350586, "learning_rate": 2.708666988167091e-07, "loss": 0.4228, "step": 15914 }, { "epoch": 2.31, "grad_norm": 7.1138458251953125, "learning_rate": 2.7075838383161545e-07, "loss": 0.271, "step": 15915 }, { "epoch": 2.31, "grad_norm": 7.823349952697754, "learning_rate": 2.706500871164853e-07, "loss": 0.3338, "step": 15916 }, { "epoch": 2.31, "grad_norm": 8.330471992492676, "learning_rate": 2.705418086740322e-07, "loss": 0.3561, "step": 15917 }, { "epoch": 2.31, "grad_norm": 8.449233055114746, "learning_rate": 2.704335485069688e-07, "loss": 0.3564, "step": 15918 }, { "epoch": 2.31, "grad_norm": 10.253442764282227, "learning_rate": 2.7032530661800745e-07, "loss": 0.3999, "step": 15919 }, { "epoch": 2.31, "grad_norm": 9.321895599365234, "learning_rate": 2.702170830098599e-07, "loss": 0.404, "step": 15920 }, { "epoch": 2.31, "grad_norm": 8.253888130187988, "learning_rate": 2.701088776852376e-07, "loss": 0.349, "step": 15921 }, { "epoch": 2.31, "grad_norm": 8.054718017578125, "learning_rate": 2.700006906468514e-07, "loss": 0.3066, "step": 15922 }, { "epoch": 2.31, "grad_norm": 8.14187240600586, "learning_rate": 2.6989252189741174e-07, "loss": 0.332, "step": 15923 }, { "epoch": 2.31, "grad_norm": 7.956262588500977, "learning_rate": 2.6978437143962873e-07, "loss": 0.3574, "step": 15924 }, { "epoch": 2.31, "grad_norm": 10.710099220275879, "learning_rate": 2.6967623927621176e-07, "loss": 0.4121, "step": 15925 }, { "epoch": 2.31, "grad_norm": 8.167216300964355, "learning_rate": 2.6956812540986995e-07, "loss": 0.3597, "step": 15926 }, { "epoch": 2.31, "grad_norm": 7.975152492523193, "learning_rate": 2.6946002984331196e-07, "loss": 0.3386, "step": 15927 }, { "epoch": 2.31, "grad_norm": 8.661168098449707, "learning_rate": 2.693519525792459e-07, "loss": 0.3374, "step": 15928 }, { "epoch": 2.31, "grad_norm": 8.29003620147705, "learning_rate": 2.6924389362037946e-07, "loss": 0.3611, "step": 15929 }, { "epoch": 2.31, "grad_norm": 8.473862648010254, "learning_rate": 2.6913585296942034e-07, "loss": 0.3589, "step": 15930 }, { "epoch": 2.31, "grad_norm": 8.422140121459961, "learning_rate": 2.690278306290742e-07, "loss": 0.4005, "step": 15931 }, { "epoch": 2.31, "grad_norm": 8.914494514465332, "learning_rate": 2.689198266020486e-07, "loss": 0.4104, "step": 15932 }, { "epoch": 2.31, "grad_norm": 7.830766677856445, "learning_rate": 2.6881184089104837e-07, "loss": 0.3123, "step": 15933 }, { "epoch": 2.31, "grad_norm": 9.126907348632812, "learning_rate": 2.6870387349878e-07, "loss": 0.3939, "step": 15934 }, { "epoch": 2.31, "grad_norm": 7.867440223693848, "learning_rate": 2.685959244279472e-07, "loss": 0.3327, "step": 15935 }, { "epoch": 2.31, "grad_norm": 7.324036598205566, "learning_rate": 2.6848799368125575e-07, "loss": 0.3499, "step": 15936 }, { "epoch": 2.31, "grad_norm": 9.330164909362793, "learning_rate": 2.6838008126140866e-07, "loss": 0.4029, "step": 15937 }, { "epoch": 2.31, "grad_norm": 9.620905876159668, "learning_rate": 2.6827218717111e-07, "loss": 0.3881, "step": 15938 }, { "epoch": 2.31, "grad_norm": 8.491043090820312, "learning_rate": 2.681643114130626e-07, "loss": 0.3995, "step": 15939 }, { "epoch": 2.31, "grad_norm": 7.820701599121094, "learning_rate": 2.6805645398996947e-07, "loss": 0.3025, "step": 15940 }, { "epoch": 2.31, "grad_norm": 8.853549003601074, "learning_rate": 2.6794861490453247e-07, "loss": 0.3924, "step": 15941 }, { "epoch": 2.31, "grad_norm": 7.670133590698242, "learning_rate": 2.678407941594535e-07, "loss": 0.3146, "step": 15942 }, { "epoch": 2.31, "grad_norm": 8.969123840332031, "learning_rate": 2.6773299175743367e-07, "loss": 0.3848, "step": 15943 }, { "epoch": 2.31, "grad_norm": 10.362717628479004, "learning_rate": 2.6762520770117414e-07, "loss": 0.401, "step": 15944 }, { "epoch": 2.31, "grad_norm": 7.9690470695495605, "learning_rate": 2.6751744199337487e-07, "loss": 0.3458, "step": 15945 }, { "epoch": 2.31, "grad_norm": 8.367646217346191, "learning_rate": 2.6740969463673636e-07, "loss": 0.3376, "step": 15946 }, { "epoch": 2.31, "grad_norm": 9.1550931930542, "learning_rate": 2.673019656339569e-07, "loss": 0.4023, "step": 15947 }, { "epoch": 2.31, "grad_norm": 8.106674194335938, "learning_rate": 2.6719425498773684e-07, "loss": 0.3623, "step": 15948 }, { "epoch": 2.31, "grad_norm": 7.733614444732666, "learning_rate": 2.6708656270077345e-07, "loss": 0.3076, "step": 15949 }, { "epoch": 2.31, "grad_norm": 11.244384765625, "learning_rate": 2.669788887757657e-07, "loss": 0.4728, "step": 15950 }, { "epoch": 2.31, "grad_norm": 8.849361419677734, "learning_rate": 2.6687123321541105e-07, "loss": 0.3834, "step": 15951 }, { "epoch": 2.31, "grad_norm": 9.484513282775879, "learning_rate": 2.6676359602240594e-07, "loss": 0.3815, "step": 15952 }, { "epoch": 2.31, "grad_norm": 8.958874702453613, "learning_rate": 2.666559771994481e-07, "loss": 0.3602, "step": 15953 }, { "epoch": 2.31, "grad_norm": 8.247401237487793, "learning_rate": 2.6654837674923267e-07, "loss": 0.3456, "step": 15954 }, { "epoch": 2.32, "grad_norm": 8.666733741760254, "learning_rate": 2.6644079467445646e-07, "loss": 0.3136, "step": 15955 }, { "epoch": 2.32, "grad_norm": 9.617390632629395, "learning_rate": 2.6633323097781366e-07, "loss": 0.4013, "step": 15956 }, { "epoch": 2.32, "grad_norm": 8.657027244567871, "learning_rate": 2.6622568566200023e-07, "loss": 0.3339, "step": 15957 }, { "epoch": 2.32, "grad_norm": 7.9604387283325195, "learning_rate": 2.6611815872970977e-07, "loss": 0.3186, "step": 15958 }, { "epoch": 2.32, "grad_norm": 8.248343467712402, "learning_rate": 2.6601065018363633e-07, "loss": 0.3473, "step": 15959 }, { "epoch": 2.32, "grad_norm": 9.100961685180664, "learning_rate": 2.6590316002647337e-07, "loss": 0.3766, "step": 15960 }, { "epoch": 2.32, "grad_norm": 8.288080215454102, "learning_rate": 2.6579568826091414e-07, "loss": 0.3773, "step": 15961 }, { "epoch": 2.32, "grad_norm": 7.263527870178223, "learning_rate": 2.6568823488965087e-07, "loss": 0.2865, "step": 15962 }, { "epoch": 2.32, "grad_norm": 9.68459701538086, "learning_rate": 2.6558079991537576e-07, "loss": 0.403, "step": 15963 }, { "epoch": 2.32, "grad_norm": 8.441669464111328, "learning_rate": 2.654733833407804e-07, "loss": 0.3197, "step": 15964 }, { "epoch": 2.32, "grad_norm": 9.132904052734375, "learning_rate": 2.65365985168556e-07, "loss": 0.3783, "step": 15965 }, { "epoch": 2.32, "grad_norm": 8.820960998535156, "learning_rate": 2.652586054013931e-07, "loss": 0.3187, "step": 15966 }, { "epoch": 2.32, "grad_norm": 9.768040657043457, "learning_rate": 2.651512440419824e-07, "loss": 0.3373, "step": 15967 }, { "epoch": 2.32, "grad_norm": 8.329031944274902, "learning_rate": 2.6504390109301254e-07, "loss": 0.352, "step": 15968 }, { "epoch": 2.32, "grad_norm": 8.76313304901123, "learning_rate": 2.6493657655717426e-07, "loss": 0.416, "step": 15969 }, { "epoch": 2.32, "grad_norm": 9.265643119812012, "learning_rate": 2.648292704371551e-07, "loss": 0.4163, "step": 15970 }, { "epoch": 2.32, "grad_norm": 7.893856525421143, "learning_rate": 2.647219827356446e-07, "loss": 0.3296, "step": 15971 }, { "epoch": 2.32, "grad_norm": 9.03403091430664, "learning_rate": 2.6461471345532995e-07, "loss": 0.355, "step": 15972 }, { "epoch": 2.32, "grad_norm": 8.342901229858398, "learning_rate": 2.6450746259889865e-07, "loss": 0.3768, "step": 15973 }, { "epoch": 2.32, "grad_norm": 8.158838272094727, "learning_rate": 2.644002301690379e-07, "loss": 0.3317, "step": 15974 }, { "epoch": 2.32, "grad_norm": 7.167026996612549, "learning_rate": 2.64293016168434e-07, "loss": 0.328, "step": 15975 }, { "epoch": 2.32, "grad_norm": 10.235776901245117, "learning_rate": 2.6418582059977335e-07, "loss": 0.3938, "step": 15976 }, { "epoch": 2.32, "grad_norm": 7.509772300720215, "learning_rate": 2.640786434657414e-07, "loss": 0.3268, "step": 15977 }, { "epoch": 2.32, "grad_norm": 8.747684478759766, "learning_rate": 2.639714847690232e-07, "loss": 0.3607, "step": 15978 }, { "epoch": 2.32, "grad_norm": 8.227936744689941, "learning_rate": 2.6386434451230354e-07, "loss": 0.3675, "step": 15979 }, { "epoch": 2.32, "grad_norm": 9.615362167358398, "learning_rate": 2.637572226982666e-07, "loss": 0.335, "step": 15980 }, { "epoch": 2.32, "grad_norm": 8.945000648498535, "learning_rate": 2.6365011932959626e-07, "loss": 0.3899, "step": 15981 }, { "epoch": 2.32, "grad_norm": 7.829990386962891, "learning_rate": 2.6354303440897576e-07, "loss": 0.3263, "step": 15982 }, { "epoch": 2.32, "grad_norm": 8.964075088500977, "learning_rate": 2.6343596793908787e-07, "loss": 0.3807, "step": 15983 }, { "epoch": 2.32, "grad_norm": 8.123830795288086, "learning_rate": 2.63328919922615e-07, "loss": 0.2775, "step": 15984 }, { "epoch": 2.32, "grad_norm": 8.34665584564209, "learning_rate": 2.6322189036223906e-07, "loss": 0.3664, "step": 15985 }, { "epoch": 2.32, "grad_norm": 9.428467750549316, "learning_rate": 2.631148792606418e-07, "loss": 0.3785, "step": 15986 }, { "epoch": 2.32, "grad_norm": 8.123246192932129, "learning_rate": 2.6300788662050344e-07, "loss": 0.3036, "step": 15987 }, { "epoch": 2.32, "grad_norm": 7.8375372886657715, "learning_rate": 2.629009124445055e-07, "loss": 0.3551, "step": 15988 }, { "epoch": 2.32, "grad_norm": 8.602408409118652, "learning_rate": 2.62793956735327e-07, "loss": 0.3636, "step": 15989 }, { "epoch": 2.32, "grad_norm": 8.1799955368042, "learning_rate": 2.6268701949564865e-07, "loss": 0.3267, "step": 15990 }, { "epoch": 2.32, "grad_norm": 7.947980880737305, "learning_rate": 2.625801007281484e-07, "loss": 0.3235, "step": 15991 }, { "epoch": 2.32, "grad_norm": 9.357010841369629, "learning_rate": 2.6247320043550613e-07, "loss": 0.4272, "step": 15992 }, { "epoch": 2.32, "grad_norm": 8.288418769836426, "learning_rate": 2.6236631862039914e-07, "loss": 0.4018, "step": 15993 }, { "epoch": 2.32, "grad_norm": 8.398868560791016, "learning_rate": 2.6225945528550564e-07, "loss": 0.3409, "step": 15994 }, { "epoch": 2.32, "grad_norm": 7.896910190582275, "learning_rate": 2.6215261043350267e-07, "loss": 0.3172, "step": 15995 }, { "epoch": 2.32, "grad_norm": 7.132739543914795, "learning_rate": 2.620457840670672e-07, "loss": 0.2933, "step": 15996 }, { "epoch": 2.32, "grad_norm": 9.42094898223877, "learning_rate": 2.619389761888756e-07, "loss": 0.3808, "step": 15997 }, { "epoch": 2.32, "grad_norm": 9.43614387512207, "learning_rate": 2.6183218680160366e-07, "loss": 0.3869, "step": 15998 }, { "epoch": 2.32, "grad_norm": 8.149236679077148, "learning_rate": 2.61725415907927e-07, "loss": 0.3472, "step": 15999 }, { "epoch": 2.32, "grad_norm": 9.403757095336914, "learning_rate": 2.616186635105203e-07, "loss": 0.3852, "step": 16000 }, { "epoch": 2.32, "grad_norm": 7.036712646484375, "learning_rate": 2.6151192961205836e-07, "loss": 0.295, "step": 16001 }, { "epoch": 2.32, "grad_norm": 8.271872520446777, "learning_rate": 2.614052142152151e-07, "loss": 0.3572, "step": 16002 }, { "epoch": 2.32, "grad_norm": 8.495295524597168, "learning_rate": 2.612985173226641e-07, "loss": 0.355, "step": 16003 }, { "epoch": 2.32, "grad_norm": 9.124972343444824, "learning_rate": 2.611918389370784e-07, "loss": 0.3827, "step": 16004 }, { "epoch": 2.32, "grad_norm": 8.833581924438477, "learning_rate": 2.610851790611309e-07, "loss": 0.3885, "step": 16005 }, { "epoch": 2.32, "grad_norm": 9.00918197631836, "learning_rate": 2.6097853769749355e-07, "loss": 0.4135, "step": 16006 }, { "epoch": 2.32, "grad_norm": 8.16532039642334, "learning_rate": 2.6087191484883844e-07, "loss": 0.3719, "step": 16007 }, { "epoch": 2.32, "grad_norm": 8.377908706665039, "learning_rate": 2.6076531051783593e-07, "loss": 0.3357, "step": 16008 }, { "epoch": 2.32, "grad_norm": 8.541908264160156, "learning_rate": 2.60658724707158e-07, "loss": 0.3741, "step": 16009 }, { "epoch": 2.32, "grad_norm": 8.107412338256836, "learning_rate": 2.605521574194739e-07, "loss": 0.303, "step": 16010 }, { "epoch": 2.32, "grad_norm": 7.493878364562988, "learning_rate": 2.6044560865745457e-07, "loss": 0.2894, "step": 16011 }, { "epoch": 2.32, "grad_norm": 8.161969184875488, "learning_rate": 2.603390784237687e-07, "loss": 0.3171, "step": 16012 }, { "epoch": 2.32, "grad_norm": 7.778814792633057, "learning_rate": 2.602325667210853e-07, "loss": 0.3643, "step": 16013 }, { "epoch": 2.32, "grad_norm": 7.8629374504089355, "learning_rate": 2.601260735520728e-07, "loss": 0.3219, "step": 16014 }, { "epoch": 2.32, "grad_norm": 8.999445915222168, "learning_rate": 2.600195989193996e-07, "loss": 0.4272, "step": 16015 }, { "epoch": 2.32, "grad_norm": 9.861430168151855, "learning_rate": 2.5991314282573284e-07, "loss": 0.401, "step": 16016 }, { "epoch": 2.32, "grad_norm": 9.013866424560547, "learning_rate": 2.5980670527373983e-07, "loss": 0.3828, "step": 16017 }, { "epoch": 2.32, "grad_norm": 8.308453559875488, "learning_rate": 2.597002862660872e-07, "loss": 0.3593, "step": 16018 }, { "epoch": 2.32, "grad_norm": 8.28327465057373, "learning_rate": 2.5959388580544095e-07, "loss": 0.3316, "step": 16019 }, { "epoch": 2.32, "grad_norm": 8.133143424987793, "learning_rate": 2.5948750389446684e-07, "loss": 0.3539, "step": 16020 }, { "epoch": 2.32, "grad_norm": 10.683466911315918, "learning_rate": 2.593811405358305e-07, "loss": 0.4605, "step": 16021 }, { "epoch": 2.32, "grad_norm": 8.887673377990723, "learning_rate": 2.592747957321957e-07, "loss": 0.306, "step": 16022 }, { "epoch": 2.32, "grad_norm": 8.035407066345215, "learning_rate": 2.5916846948622784e-07, "loss": 0.3043, "step": 16023 }, { "epoch": 2.33, "grad_norm": 8.79555892944336, "learning_rate": 2.590621618005897e-07, "loss": 0.3458, "step": 16024 }, { "epoch": 2.33, "grad_norm": 8.341683387756348, "learning_rate": 2.5895587267794584e-07, "loss": 0.3243, "step": 16025 }, { "epoch": 2.33, "grad_norm": 7.690488338470459, "learning_rate": 2.5884960212095784e-07, "loss": 0.3099, "step": 16026 }, { "epoch": 2.33, "grad_norm": 8.417414665222168, "learning_rate": 2.5874335013228953e-07, "loss": 0.3204, "step": 16027 }, { "epoch": 2.33, "grad_norm": 8.885334968566895, "learning_rate": 2.5863711671460174e-07, "loss": 0.381, "step": 16028 }, { "epoch": 2.33, "grad_norm": 8.711478233337402, "learning_rate": 2.585309018705562e-07, "loss": 0.3887, "step": 16029 }, { "epoch": 2.33, "grad_norm": 9.173919677734375, "learning_rate": 2.584247056028147e-07, "loss": 0.3624, "step": 16030 }, { "epoch": 2.33, "grad_norm": 8.439701080322266, "learning_rate": 2.5831852791403675e-07, "loss": 0.3657, "step": 16031 }, { "epoch": 2.33, "grad_norm": 7.969398021697998, "learning_rate": 2.582123688068835e-07, "loss": 0.3764, "step": 16032 }, { "epoch": 2.33, "grad_norm": 9.873128890991211, "learning_rate": 2.5810622828401375e-07, "loss": 0.3332, "step": 16033 }, { "epoch": 2.33, "grad_norm": 8.520431518554688, "learning_rate": 2.58000106348087e-07, "loss": 0.3075, "step": 16034 }, { "epoch": 2.33, "grad_norm": 8.873578071594238, "learning_rate": 2.57894003001762e-07, "loss": 0.3718, "step": 16035 }, { "epoch": 2.33, "grad_norm": 9.352825164794922, "learning_rate": 2.5778791824769694e-07, "loss": 0.3951, "step": 16036 }, { "epoch": 2.33, "grad_norm": 8.591086387634277, "learning_rate": 2.5768185208854963e-07, "loss": 0.3548, "step": 16037 }, { "epoch": 2.33, "grad_norm": 9.480914115905762, "learning_rate": 2.5757580452697745e-07, "loss": 0.399, "step": 16038 }, { "epoch": 2.33, "grad_norm": 8.31070613861084, "learning_rate": 2.574697755656371e-07, "loss": 0.2811, "step": 16039 }, { "epoch": 2.33, "grad_norm": 8.650835990905762, "learning_rate": 2.573637652071851e-07, "loss": 0.3406, "step": 16040 }, { "epoch": 2.33, "grad_norm": 9.411311149597168, "learning_rate": 2.5725777345427715e-07, "loss": 0.4246, "step": 16041 }, { "epoch": 2.33, "grad_norm": 8.15899658203125, "learning_rate": 2.571518003095693e-07, "loss": 0.355, "step": 16042 }, { "epoch": 2.33, "grad_norm": 8.378600120544434, "learning_rate": 2.570458457757154e-07, "loss": 0.3209, "step": 16043 }, { "epoch": 2.33, "grad_norm": 9.540595054626465, "learning_rate": 2.569399098553714e-07, "loss": 0.3767, "step": 16044 }, { "epoch": 2.33, "grad_norm": 8.04355239868164, "learning_rate": 2.568339925511899e-07, "loss": 0.3662, "step": 16045 }, { "epoch": 2.33, "grad_norm": 7.89115571975708, "learning_rate": 2.567280938658258e-07, "loss": 0.3014, "step": 16046 }, { "epoch": 2.33, "grad_norm": 7.8944926261901855, "learning_rate": 2.5662221380193105e-07, "loss": 0.3479, "step": 16047 }, { "epoch": 2.33, "grad_norm": 8.255878448486328, "learning_rate": 2.565163523621594e-07, "loss": 0.3641, "step": 16048 }, { "epoch": 2.33, "grad_norm": 9.243215560913086, "learning_rate": 2.564105095491623e-07, "loss": 0.3585, "step": 16049 }, { "epoch": 2.33, "grad_norm": 7.594046592712402, "learning_rate": 2.563046853655917e-07, "loss": 0.3349, "step": 16050 }, { "epoch": 2.33, "grad_norm": 7.784483432769775, "learning_rate": 2.561988798140987e-07, "loss": 0.3302, "step": 16051 }, { "epoch": 2.33, "grad_norm": 7.480440139770508, "learning_rate": 2.560930928973344e-07, "loss": 0.2965, "step": 16052 }, { "epoch": 2.33, "grad_norm": 8.125565528869629, "learning_rate": 2.559873246179488e-07, "loss": 0.3294, "step": 16053 }, { "epoch": 2.33, "grad_norm": 8.510942459106445, "learning_rate": 2.5588157497859186e-07, "loss": 0.3138, "step": 16054 }, { "epoch": 2.33, "grad_norm": 9.975095748901367, "learning_rate": 2.557758439819131e-07, "loss": 0.3628, "step": 16055 }, { "epoch": 2.33, "grad_norm": 9.08370304107666, "learning_rate": 2.556701316305613e-07, "loss": 0.3243, "step": 16056 }, { "epoch": 2.33, "grad_norm": 8.124192237854004, "learning_rate": 2.5556443792718486e-07, "loss": 0.3295, "step": 16057 }, { "epoch": 2.33, "grad_norm": 7.186871528625488, "learning_rate": 2.554587628744319e-07, "loss": 0.3481, "step": 16058 }, { "epoch": 2.33, "grad_norm": 8.534300804138184, "learning_rate": 2.553531064749499e-07, "loss": 0.3741, "step": 16059 }, { "epoch": 2.33, "grad_norm": 8.503599166870117, "learning_rate": 2.5524746873138594e-07, "loss": 0.3195, "step": 16060 }, { "epoch": 2.33, "grad_norm": 8.61159896850586, "learning_rate": 2.5514184964638654e-07, "loss": 0.378, "step": 16061 }, { "epoch": 2.33, "grad_norm": 7.606055736541748, "learning_rate": 2.550362492225978e-07, "loss": 0.3469, "step": 16062 }, { "epoch": 2.33, "grad_norm": 8.166574478149414, "learning_rate": 2.5493066746266565e-07, "loss": 0.3313, "step": 16063 }, { "epoch": 2.33, "grad_norm": 8.965673446655273, "learning_rate": 2.548251043692344e-07, "loss": 0.3157, "step": 16064 }, { "epoch": 2.33, "grad_norm": 8.60239315032959, "learning_rate": 2.547195599449501e-07, "loss": 0.3565, "step": 16065 }, { "epoch": 2.33, "grad_norm": 8.358476638793945, "learning_rate": 2.546140341924556e-07, "loss": 0.341, "step": 16066 }, { "epoch": 2.33, "grad_norm": 8.89724349975586, "learning_rate": 2.5450852711439595e-07, "loss": 0.2741, "step": 16067 }, { "epoch": 2.33, "grad_norm": 9.368985176086426, "learning_rate": 2.544030387134136e-07, "loss": 0.3933, "step": 16068 }, { "epoch": 2.33, "grad_norm": 9.178403854370117, "learning_rate": 2.542975689921516e-07, "loss": 0.3288, "step": 16069 }, { "epoch": 2.33, "grad_norm": 8.659625053405762, "learning_rate": 2.5419211795325234e-07, "loss": 0.3469, "step": 16070 }, { "epoch": 2.33, "grad_norm": 9.528485298156738, "learning_rate": 2.540866855993579e-07, "loss": 0.3352, "step": 16071 }, { "epoch": 2.33, "grad_norm": 7.923194885253906, "learning_rate": 2.5398127193310934e-07, "loss": 0.3348, "step": 16072 }, { "epoch": 2.33, "grad_norm": 9.65129280090332, "learning_rate": 2.538758769571481e-07, "loss": 0.3905, "step": 16073 }, { "epoch": 2.33, "grad_norm": 10.7264404296875, "learning_rate": 2.5377050067411434e-07, "loss": 0.3707, "step": 16074 }, { "epoch": 2.33, "grad_norm": 8.44283390045166, "learning_rate": 2.536651430866481e-07, "loss": 0.3369, "step": 16075 }, { "epoch": 2.33, "grad_norm": 8.067728042602539, "learning_rate": 2.535598041973891e-07, "loss": 0.3602, "step": 16076 }, { "epoch": 2.33, "grad_norm": 7.897017002105713, "learning_rate": 2.5345448400897675e-07, "loss": 0.3105, "step": 16077 }, { "epoch": 2.33, "grad_norm": 9.741146087646484, "learning_rate": 2.5334918252404856e-07, "loss": 0.4463, "step": 16078 }, { "epoch": 2.33, "grad_norm": 7.764326572418213, "learning_rate": 2.5324389974524406e-07, "loss": 0.3515, "step": 16079 }, { "epoch": 2.33, "grad_norm": 8.120445251464844, "learning_rate": 2.531386356751998e-07, "loss": 0.3138, "step": 16080 }, { "epoch": 2.33, "grad_norm": 7.58942985534668, "learning_rate": 2.530333903165537e-07, "loss": 0.3273, "step": 16081 }, { "epoch": 2.33, "grad_norm": 10.024497032165527, "learning_rate": 2.529281636719424e-07, "loss": 0.4015, "step": 16082 }, { "epoch": 2.33, "grad_norm": 8.570649147033691, "learning_rate": 2.528229557440019e-07, "loss": 0.3914, "step": 16083 }, { "epoch": 2.33, "grad_norm": 8.73789119720459, "learning_rate": 2.527177665353687e-07, "loss": 0.3495, "step": 16084 }, { "epoch": 2.33, "grad_norm": 9.16832160949707, "learning_rate": 2.52612596048677e-07, "loss": 0.3492, "step": 16085 }, { "epoch": 2.33, "grad_norm": 8.628371238708496, "learning_rate": 2.525074442865629e-07, "loss": 0.3593, "step": 16086 }, { "epoch": 2.33, "grad_norm": 9.509077072143555, "learning_rate": 2.5240231125165966e-07, "loss": 0.3711, "step": 16087 }, { "epoch": 2.33, "grad_norm": 9.222774505615234, "learning_rate": 2.5229719694660246e-07, "loss": 0.3749, "step": 16088 }, { "epoch": 2.33, "grad_norm": 9.859130859375, "learning_rate": 2.521921013740237e-07, "loss": 0.378, "step": 16089 }, { "epoch": 2.33, "grad_norm": 8.374462127685547, "learning_rate": 2.520870245365568e-07, "loss": 0.3508, "step": 16090 }, { "epoch": 2.33, "grad_norm": 8.081926345825195, "learning_rate": 2.519819664368343e-07, "loss": 0.3721, "step": 16091 }, { "epoch": 2.33, "grad_norm": 8.94868278503418, "learning_rate": 2.518769270774882e-07, "loss": 0.3041, "step": 16092 }, { "epoch": 2.34, "grad_norm": 7.467329978942871, "learning_rate": 2.517719064611501e-07, "loss": 0.3261, "step": 16093 }, { "epoch": 2.34, "grad_norm": 9.387564659118652, "learning_rate": 2.5166690459045123e-07, "loss": 0.3562, "step": 16094 }, { "epoch": 2.34, "grad_norm": 8.97826099395752, "learning_rate": 2.51561921468022e-07, "loss": 0.3929, "step": 16095 }, { "epoch": 2.34, "grad_norm": 8.08910846710205, "learning_rate": 2.5145695709649296e-07, "loss": 0.3599, "step": 16096 }, { "epoch": 2.34, "grad_norm": 9.584613800048828, "learning_rate": 2.513520114784934e-07, "loss": 0.3365, "step": 16097 }, { "epoch": 2.34, "grad_norm": 9.114736557006836, "learning_rate": 2.512470846166532e-07, "loss": 0.3759, "step": 16098 }, { "epoch": 2.34, "grad_norm": 8.417801856994629, "learning_rate": 2.5114217651360003e-07, "loss": 0.3011, "step": 16099 }, { "epoch": 2.34, "grad_norm": 7.768744468688965, "learning_rate": 2.5103728717196347e-07, "loss": 0.3204, "step": 16100 }, { "epoch": 2.34, "grad_norm": 9.842734336853027, "learning_rate": 2.509324165943701e-07, "loss": 0.3283, "step": 16101 }, { "epoch": 2.34, "grad_norm": 8.93189811706543, "learning_rate": 2.508275647834487e-07, "loss": 0.3362, "step": 16102 }, { "epoch": 2.34, "grad_norm": 7.142301559448242, "learning_rate": 2.507227317418247e-07, "loss": 0.3074, "step": 16103 }, { "epoch": 2.34, "grad_norm": 7.6728386878967285, "learning_rate": 2.506179174721257e-07, "loss": 0.3392, "step": 16104 }, { "epoch": 2.34, "grad_norm": 9.043550491333008, "learning_rate": 2.5051312197697694e-07, "loss": 0.3624, "step": 16105 }, { "epoch": 2.34, "grad_norm": 8.91425609588623, "learning_rate": 2.504083452590038e-07, "loss": 0.3756, "step": 16106 }, { "epoch": 2.34, "grad_norm": 8.519577980041504, "learning_rate": 2.5030358732083226e-07, "loss": 0.3475, "step": 16107 }, { "epoch": 2.34, "grad_norm": 7.761226177215576, "learning_rate": 2.501988481650855e-07, "loss": 0.3035, "step": 16108 }, { "epoch": 2.34, "grad_norm": 9.727789878845215, "learning_rate": 2.500941277943891e-07, "loss": 0.3724, "step": 16109 }, { "epoch": 2.34, "grad_norm": 8.168112754821777, "learning_rate": 2.4998942621136544e-07, "loss": 0.3311, "step": 16110 }, { "epoch": 2.34, "grad_norm": 9.062769889831543, "learning_rate": 2.4988474341863795e-07, "loss": 0.315, "step": 16111 }, { "epoch": 2.34, "grad_norm": 9.725441932678223, "learning_rate": 2.497800794188296e-07, "loss": 0.4479, "step": 16112 }, { "epoch": 2.34, "grad_norm": 7.131636619567871, "learning_rate": 2.496754342145623e-07, "loss": 0.3558, "step": 16113 }, { "epoch": 2.34, "grad_norm": 7.831178665161133, "learning_rate": 2.4957080780845784e-07, "loss": 0.2899, "step": 16114 }, { "epoch": 2.34, "grad_norm": 8.714079856872559, "learning_rate": 2.4946620020313746e-07, "loss": 0.3784, "step": 16115 }, { "epoch": 2.34, "grad_norm": 8.712482452392578, "learning_rate": 2.49361611401222e-07, "loss": 0.3823, "step": 16116 }, { "epoch": 2.34, "grad_norm": 9.399116516113281, "learning_rate": 2.4925704140533164e-07, "loss": 0.3498, "step": 16117 }, { "epoch": 2.34, "grad_norm": 8.636265754699707, "learning_rate": 2.491524902180864e-07, "loss": 0.3674, "step": 16118 }, { "epoch": 2.34, "grad_norm": 8.463434219360352, "learning_rate": 2.490479578421056e-07, "loss": 0.3113, "step": 16119 }, { "epoch": 2.34, "grad_norm": 7.3582048416137695, "learning_rate": 2.489434442800077e-07, "loss": 0.3299, "step": 16120 }, { "epoch": 2.34, "grad_norm": 9.159087181091309, "learning_rate": 2.488389495344119e-07, "loss": 0.3881, "step": 16121 }, { "epoch": 2.34, "grad_norm": 7.832568168640137, "learning_rate": 2.487344736079351e-07, "loss": 0.3424, "step": 16122 }, { "epoch": 2.34, "grad_norm": 7.891354560852051, "learning_rate": 2.4863001650319604e-07, "loss": 0.2691, "step": 16123 }, { "epoch": 2.34, "grad_norm": 8.740900993347168, "learning_rate": 2.485255782228108e-07, "loss": 0.3325, "step": 16124 }, { "epoch": 2.34, "grad_norm": 7.796001434326172, "learning_rate": 2.484211587693962e-07, "loss": 0.3238, "step": 16125 }, { "epoch": 2.34, "grad_norm": 9.690834045410156, "learning_rate": 2.483167581455682e-07, "loss": 0.4017, "step": 16126 }, { "epoch": 2.34, "grad_norm": 8.038509368896484, "learning_rate": 2.482123763539424e-07, "loss": 0.3593, "step": 16127 }, { "epoch": 2.34, "grad_norm": 7.734050750732422, "learning_rate": 2.4810801339713416e-07, "loss": 0.3666, "step": 16128 }, { "epoch": 2.34, "grad_norm": 10.066234588623047, "learning_rate": 2.4800366927775785e-07, "loss": 0.4327, "step": 16129 }, { "epoch": 2.34, "grad_norm": 8.592656135559082, "learning_rate": 2.478993439984277e-07, "loss": 0.3436, "step": 16130 }, { "epoch": 2.34, "grad_norm": 8.819449424743652, "learning_rate": 2.477950375617576e-07, "loss": 0.405, "step": 16131 }, { "epoch": 2.34, "grad_norm": 7.703714370727539, "learning_rate": 2.4769074997036055e-07, "loss": 0.2833, "step": 16132 }, { "epoch": 2.34, "grad_norm": 8.9802885055542, "learning_rate": 2.4758648122684935e-07, "loss": 0.3971, "step": 16133 }, { "epoch": 2.34, "grad_norm": 9.828964233398438, "learning_rate": 2.4748223133383626e-07, "loss": 0.3704, "step": 16134 }, { "epoch": 2.34, "grad_norm": 8.209433555603027, "learning_rate": 2.473780002939333e-07, "loss": 0.3758, "step": 16135 }, { "epoch": 2.34, "grad_norm": 9.319793701171875, "learning_rate": 2.4727378810975164e-07, "loss": 0.3924, "step": 16136 }, { "epoch": 2.34, "grad_norm": 7.933793544769287, "learning_rate": 2.4716959478390206e-07, "loss": 0.3127, "step": 16137 }, { "epoch": 2.34, "grad_norm": 8.013160705566406, "learning_rate": 2.470654203189951e-07, "loss": 0.3104, "step": 16138 }, { "epoch": 2.34, "grad_norm": 10.451111793518066, "learning_rate": 2.469612647176407e-07, "loss": 0.3924, "step": 16139 }, { "epoch": 2.34, "grad_norm": 9.451334953308105, "learning_rate": 2.468571279824485e-07, "loss": 0.3523, "step": 16140 }, { "epoch": 2.34, "grad_norm": 8.975983619689941, "learning_rate": 2.4675301011602665e-07, "loss": 0.4063, "step": 16141 }, { "epoch": 2.34, "grad_norm": 7.903792381286621, "learning_rate": 2.4664891112098486e-07, "loss": 0.3518, "step": 16142 }, { "epoch": 2.34, "grad_norm": 9.904667854309082, "learning_rate": 2.4654483099992995e-07, "loss": 0.3794, "step": 16143 }, { "epoch": 2.34, "grad_norm": 8.282546043395996, "learning_rate": 2.4644076975547055e-07, "loss": 0.3872, "step": 16144 }, { "epoch": 2.34, "grad_norm": 8.241223335266113, "learning_rate": 2.4633672739021305e-07, "loss": 0.3182, "step": 16145 }, { "epoch": 2.34, "grad_norm": 7.9877543449401855, "learning_rate": 2.462327039067643e-07, "loss": 0.35, "step": 16146 }, { "epoch": 2.34, "grad_norm": 8.277637481689453, "learning_rate": 2.461286993077304e-07, "loss": 0.332, "step": 16147 }, { "epoch": 2.34, "grad_norm": 8.578984260559082, "learning_rate": 2.46024713595717e-07, "loss": 0.3602, "step": 16148 }, { "epoch": 2.34, "grad_norm": 8.464502334594727, "learning_rate": 2.459207467733292e-07, "loss": 0.3829, "step": 16149 }, { "epoch": 2.34, "grad_norm": 7.87591028213501, "learning_rate": 2.4581679884317195e-07, "loss": 0.2893, "step": 16150 }, { "epoch": 2.34, "grad_norm": 8.622206687927246, "learning_rate": 2.4571286980784944e-07, "loss": 0.3238, "step": 16151 }, { "epoch": 2.34, "grad_norm": 8.766623497009277, "learning_rate": 2.456089596699653e-07, "loss": 0.3994, "step": 16152 }, { "epoch": 2.34, "grad_norm": 9.127066612243652, "learning_rate": 2.4550506843212295e-07, "loss": 0.4382, "step": 16153 }, { "epoch": 2.34, "grad_norm": 7.447131633758545, "learning_rate": 2.4540119609692557e-07, "loss": 0.2984, "step": 16154 }, { "epoch": 2.34, "grad_norm": 7.331572532653809, "learning_rate": 2.4529734266697444e-07, "loss": 0.2857, "step": 16155 }, { "epoch": 2.34, "grad_norm": 7.620121479034424, "learning_rate": 2.451935081448727e-07, "loss": 0.3408, "step": 16156 }, { "epoch": 2.34, "grad_norm": 9.335214614868164, "learning_rate": 2.4508969253322066e-07, "loss": 0.3977, "step": 16157 }, { "epoch": 2.34, "grad_norm": 7.021379470825195, "learning_rate": 2.449858958346199e-07, "loss": 0.3377, "step": 16158 }, { "epoch": 2.34, "grad_norm": 9.34508991241455, "learning_rate": 2.4488211805167125e-07, "loss": 0.3652, "step": 16159 }, { "epoch": 2.34, "grad_norm": 7.516880035400391, "learning_rate": 2.4477835918697355e-07, "loss": 0.3531, "step": 16160 }, { "epoch": 2.34, "grad_norm": 10.1091890335083, "learning_rate": 2.4467461924312747e-07, "loss": 0.4221, "step": 16161 }, { "epoch": 2.35, "grad_norm": 8.511101722717285, "learning_rate": 2.44570898222731e-07, "loss": 0.36, "step": 16162 }, { "epoch": 2.35, "grad_norm": 10.094120025634766, "learning_rate": 2.4446719612838384e-07, "loss": 0.4185, "step": 16163 }, { "epoch": 2.35, "grad_norm": 8.656222343444824, "learning_rate": 2.443635129626829e-07, "loss": 0.3723, "step": 16164 }, { "epoch": 2.35, "grad_norm": 9.16590404510498, "learning_rate": 2.442598487282269e-07, "loss": 0.442, "step": 16165 }, { "epoch": 2.35, "grad_norm": 7.290533065795898, "learning_rate": 2.441562034276121e-07, "loss": 0.3319, "step": 16166 }, { "epoch": 2.35, "grad_norm": 8.61507797241211, "learning_rate": 2.440525770634356e-07, "loss": 0.3699, "step": 16167 }, { "epoch": 2.35, "grad_norm": 8.594552993774414, "learning_rate": 2.4394896963829346e-07, "loss": 0.3165, "step": 16168 }, { "epoch": 2.35, "grad_norm": 9.117097854614258, "learning_rate": 2.438453811547814e-07, "loss": 0.4, "step": 16169 }, { "epoch": 2.35, "grad_norm": 8.234382629394531, "learning_rate": 2.437418116154948e-07, "loss": 0.3549, "step": 16170 }, { "epoch": 2.35, "grad_norm": 8.940861701965332, "learning_rate": 2.436382610230282e-07, "loss": 0.3639, "step": 16171 }, { "epoch": 2.35, "grad_norm": 8.474041938781738, "learning_rate": 2.43534729379976e-07, "loss": 0.3835, "step": 16172 }, { "epoch": 2.35, "grad_norm": 9.157983779907227, "learning_rate": 2.434312166889321e-07, "loss": 0.3827, "step": 16173 }, { "epoch": 2.35, "grad_norm": 9.162495613098145, "learning_rate": 2.433277229524898e-07, "loss": 0.3398, "step": 16174 }, { "epoch": 2.35, "grad_norm": 7.8986005783081055, "learning_rate": 2.4322424817324215e-07, "loss": 0.3483, "step": 16175 }, { "epoch": 2.35, "grad_norm": 8.854716300964355, "learning_rate": 2.431207923537808e-07, "loss": 0.3695, "step": 16176 }, { "epoch": 2.35, "grad_norm": 10.318467140197754, "learning_rate": 2.430173554966988e-07, "loss": 0.3657, "step": 16177 }, { "epoch": 2.35, "grad_norm": 9.167076110839844, "learning_rate": 2.429139376045863e-07, "loss": 0.4218, "step": 16178 }, { "epoch": 2.35, "grad_norm": 8.883233070373535, "learning_rate": 2.4281053868003567e-07, "loss": 0.3833, "step": 16179 }, { "epoch": 2.35, "grad_norm": 9.503074645996094, "learning_rate": 2.427071587256363e-07, "loss": 0.3534, "step": 16180 }, { "epoch": 2.35, "grad_norm": 8.893843650817871, "learning_rate": 2.4260379774397866e-07, "loss": 0.345, "step": 16181 }, { "epoch": 2.35, "grad_norm": 8.325326919555664, "learning_rate": 2.425004557376522e-07, "loss": 0.3296, "step": 16182 }, { "epoch": 2.35, "grad_norm": 9.080368995666504, "learning_rate": 2.42397132709246e-07, "loss": 0.3751, "step": 16183 }, { "epoch": 2.35, "grad_norm": 9.261075973510742, "learning_rate": 2.422938286613486e-07, "loss": 0.304, "step": 16184 }, { "epoch": 2.35, "grad_norm": 9.157215118408203, "learning_rate": 2.42190543596548e-07, "loss": 0.3639, "step": 16185 }, { "epoch": 2.35, "grad_norm": 7.731720924377441, "learning_rate": 2.420872775174326e-07, "loss": 0.3214, "step": 16186 }, { "epoch": 2.35, "grad_norm": 9.249384880065918, "learning_rate": 2.4198403042658867e-07, "loss": 0.2895, "step": 16187 }, { "epoch": 2.35, "grad_norm": 9.008652687072754, "learning_rate": 2.4188080232660324e-07, "loss": 0.3683, "step": 16188 }, { "epoch": 2.35, "grad_norm": 7.323349475860596, "learning_rate": 2.4177759322006243e-07, "loss": 0.3578, "step": 16189 }, { "epoch": 2.35, "grad_norm": 7.239653587341309, "learning_rate": 2.416744031095521e-07, "loss": 0.2663, "step": 16190 }, { "epoch": 2.35, "grad_norm": 8.060861587524414, "learning_rate": 2.415712319976574e-07, "loss": 0.3057, "step": 16191 }, { "epoch": 2.35, "grad_norm": 7.428914546966553, "learning_rate": 2.4146807988696316e-07, "loss": 0.3298, "step": 16192 }, { "epoch": 2.35, "grad_norm": 8.802712440490723, "learning_rate": 2.413649467800537e-07, "loss": 0.3945, "step": 16193 }, { "epoch": 2.35, "grad_norm": 8.343907356262207, "learning_rate": 2.412618326795129e-07, "loss": 0.3639, "step": 16194 }, { "epoch": 2.35, "grad_norm": 9.445812225341797, "learning_rate": 2.4115873758792403e-07, "loss": 0.3813, "step": 16195 }, { "epoch": 2.35, "grad_norm": 7.717037677764893, "learning_rate": 2.4105566150787026e-07, "loss": 0.3599, "step": 16196 }, { "epoch": 2.35, "grad_norm": 8.188543319702148, "learning_rate": 2.409526044419332e-07, "loss": 0.309, "step": 16197 }, { "epoch": 2.35, "grad_norm": 8.176637649536133, "learning_rate": 2.408495663926959e-07, "loss": 0.3371, "step": 16198 }, { "epoch": 2.35, "grad_norm": 8.976007461547852, "learning_rate": 2.407465473627386e-07, "loss": 0.4457, "step": 16199 }, { "epoch": 2.35, "grad_norm": 8.101075172424316, "learning_rate": 2.4064354735464355e-07, "loss": 0.3323, "step": 16200 }, { "epoch": 2.35, "grad_norm": 7.731698989868164, "learning_rate": 2.4054056637099016e-07, "loss": 0.3025, "step": 16201 }, { "epoch": 2.35, "grad_norm": 9.528398513793945, "learning_rate": 2.4043760441435903e-07, "loss": 0.3513, "step": 16202 }, { "epoch": 2.35, "grad_norm": 8.94466495513916, "learning_rate": 2.403346614873295e-07, "loss": 0.3737, "step": 16203 }, { "epoch": 2.35, "grad_norm": 8.041837692260742, "learning_rate": 2.4023173759248073e-07, "loss": 0.3642, "step": 16204 }, { "epoch": 2.35, "grad_norm": 9.197651863098145, "learning_rate": 2.4012883273239124e-07, "loss": 0.397, "step": 16205 }, { "epoch": 2.35, "grad_norm": 8.762689590454102, "learning_rate": 2.400259469096393e-07, "loss": 0.3701, "step": 16206 }, { "epoch": 2.35, "grad_norm": 8.60225772857666, "learning_rate": 2.399230801268023e-07, "loss": 0.3544, "step": 16207 }, { "epoch": 2.35, "grad_norm": 8.691814422607422, "learning_rate": 2.3982023238645755e-07, "loss": 0.3929, "step": 16208 }, { "epoch": 2.35, "grad_norm": 8.270129203796387, "learning_rate": 2.397174036911818e-07, "loss": 0.3587, "step": 16209 }, { "epoch": 2.35, "grad_norm": 8.030526161193848, "learning_rate": 2.396145940435514e-07, "loss": 0.3414, "step": 16210 }, { "epoch": 2.35, "grad_norm": 8.524703979492188, "learning_rate": 2.395118034461413e-07, "loss": 0.3059, "step": 16211 }, { "epoch": 2.35, "grad_norm": 8.519634246826172, "learning_rate": 2.394090319015275e-07, "loss": 0.2967, "step": 16212 }, { "epoch": 2.35, "grad_norm": 8.107644081115723, "learning_rate": 2.3930627941228465e-07, "loss": 0.3235, "step": 16213 }, { "epoch": 2.35, "grad_norm": 7.567030906677246, "learning_rate": 2.3920354598098705e-07, "loss": 0.3818, "step": 16214 }, { "epoch": 2.35, "grad_norm": 9.439570426940918, "learning_rate": 2.3910083161020853e-07, "loss": 0.4345, "step": 16215 }, { "epoch": 2.35, "grad_norm": 9.266677856445312, "learning_rate": 2.3899813630252197e-07, "loss": 0.403, "step": 16216 }, { "epoch": 2.35, "grad_norm": 8.916122436523438, "learning_rate": 2.3889546006050107e-07, "loss": 0.3817, "step": 16217 }, { "epoch": 2.35, "grad_norm": 8.258902549743652, "learning_rate": 2.387928028867172e-07, "loss": 0.3522, "step": 16218 }, { "epoch": 2.35, "grad_norm": 9.32341194152832, "learning_rate": 2.386901647837434e-07, "loss": 0.3617, "step": 16219 }, { "epoch": 2.35, "grad_norm": 8.301990509033203, "learning_rate": 2.3858754575414997e-07, "loss": 0.3715, "step": 16220 }, { "epoch": 2.35, "grad_norm": 7.393856525421143, "learning_rate": 2.384849458005089e-07, "loss": 0.3894, "step": 16221 }, { "epoch": 2.35, "grad_norm": 7.571615695953369, "learning_rate": 2.3838236492539e-07, "loss": 0.3385, "step": 16222 }, { "epoch": 2.35, "grad_norm": 7.832597255706787, "learning_rate": 2.3827980313136342e-07, "loss": 0.3082, "step": 16223 }, { "epoch": 2.35, "grad_norm": 8.408238410949707, "learning_rate": 2.3817726042099871e-07, "loss": 0.318, "step": 16224 }, { "epoch": 2.35, "grad_norm": 8.018640518188477, "learning_rate": 2.3807473679686484e-07, "loss": 0.3686, "step": 16225 }, { "epoch": 2.35, "grad_norm": 8.665196418762207, "learning_rate": 2.379722322615305e-07, "loss": 0.3142, "step": 16226 }, { "epoch": 2.35, "grad_norm": 9.340848922729492, "learning_rate": 2.3786974681756377e-07, "loss": 0.3657, "step": 16227 }, { "epoch": 2.35, "grad_norm": 9.3535795211792, "learning_rate": 2.3776728046753214e-07, "loss": 0.4252, "step": 16228 }, { "epoch": 2.35, "grad_norm": 8.502679824829102, "learning_rate": 2.3766483321400278e-07, "loss": 0.3902, "step": 16229 }, { "epoch": 2.35, "grad_norm": 7.544084072113037, "learning_rate": 2.375624050595424e-07, "loss": 0.3491, "step": 16230 }, { "epoch": 2.36, "grad_norm": 9.065486907958984, "learning_rate": 2.374599960067174e-07, "loss": 0.3955, "step": 16231 }, { "epoch": 2.36, "grad_norm": 8.542752265930176, "learning_rate": 2.3735760605809262e-07, "loss": 0.3419, "step": 16232 }, { "epoch": 2.36, "grad_norm": 10.828523635864258, "learning_rate": 2.3725523521623435e-07, "loss": 0.431, "step": 16233 }, { "epoch": 2.36, "grad_norm": 7.903574466705322, "learning_rate": 2.3715288348370643e-07, "loss": 0.344, "step": 16234 }, { "epoch": 2.36, "grad_norm": 7.700740337371826, "learning_rate": 2.37050550863074e-07, "loss": 0.3112, "step": 16235 }, { "epoch": 2.36, "grad_norm": 8.332722663879395, "learning_rate": 2.3694823735690006e-07, "loss": 0.3485, "step": 16236 }, { "epoch": 2.36, "grad_norm": 8.230781555175781, "learning_rate": 2.3684594296774796e-07, "loss": 0.3369, "step": 16237 }, { "epoch": 2.36, "grad_norm": 8.645203590393066, "learning_rate": 2.3674366769818133e-07, "loss": 0.3688, "step": 16238 }, { "epoch": 2.36, "grad_norm": 9.44372844696045, "learning_rate": 2.3664141155076133e-07, "loss": 0.4503, "step": 16239 }, { "epoch": 2.36, "grad_norm": 8.687119483947754, "learning_rate": 2.3653917452805105e-07, "loss": 0.3999, "step": 16240 }, { "epoch": 2.36, "grad_norm": 7.965549468994141, "learning_rate": 2.36436956632611e-07, "loss": 0.3479, "step": 16241 }, { "epoch": 2.36, "grad_norm": 8.02540111541748, "learning_rate": 2.363347578670022e-07, "loss": 0.3541, "step": 16242 }, { "epoch": 2.36, "grad_norm": 7.712911605834961, "learning_rate": 2.362325782337854e-07, "loss": 0.3681, "step": 16243 }, { "epoch": 2.36, "grad_norm": 7.9367499351501465, "learning_rate": 2.3613041773552023e-07, "loss": 0.3486, "step": 16244 }, { "epoch": 2.36, "grad_norm": 7.943062782287598, "learning_rate": 2.360282763747663e-07, "loss": 0.3165, "step": 16245 }, { "epoch": 2.36, "grad_norm": 8.00062084197998, "learning_rate": 2.3592615415408267e-07, "loss": 0.385, "step": 16246 }, { "epoch": 2.36, "grad_norm": 7.438684940338135, "learning_rate": 2.3582405107602765e-07, "loss": 0.3098, "step": 16247 }, { "epoch": 2.36, "grad_norm": 8.042339324951172, "learning_rate": 2.357219671431594e-07, "loss": 0.3498, "step": 16248 }, { "epoch": 2.36, "grad_norm": 9.16762924194336, "learning_rate": 2.3561990235803554e-07, "loss": 0.3816, "step": 16249 }, { "epoch": 2.36, "grad_norm": 9.574152946472168, "learning_rate": 2.3551785672321323e-07, "loss": 0.4035, "step": 16250 }, { "epoch": 2.36, "grad_norm": 9.405489921569824, "learning_rate": 2.3541583024124834e-07, "loss": 0.3386, "step": 16251 }, { "epoch": 2.36, "grad_norm": 8.095927238464355, "learning_rate": 2.3531382291469804e-07, "loss": 0.2748, "step": 16252 }, { "epoch": 2.36, "grad_norm": 8.304340362548828, "learning_rate": 2.3521183474611684e-07, "loss": 0.3338, "step": 16253 }, { "epoch": 2.36, "grad_norm": 8.951973915100098, "learning_rate": 2.3510986573806113e-07, "loss": 0.3623, "step": 16254 }, { "epoch": 2.36, "grad_norm": 8.02845287322998, "learning_rate": 2.3500791589308421e-07, "loss": 0.3673, "step": 16255 }, { "epoch": 2.36, "grad_norm": 9.632058143615723, "learning_rate": 2.3490598521374162e-07, "loss": 0.3858, "step": 16256 }, { "epoch": 2.36, "grad_norm": 9.318329811096191, "learning_rate": 2.3480407370258616e-07, "loss": 0.3827, "step": 16257 }, { "epoch": 2.36, "grad_norm": 8.492437362670898, "learning_rate": 2.3470218136217136e-07, "loss": 0.3775, "step": 16258 }, { "epoch": 2.36, "grad_norm": 8.245678901672363, "learning_rate": 2.3460030819504983e-07, "loss": 0.3187, "step": 16259 }, { "epoch": 2.36, "grad_norm": 8.338566780090332, "learning_rate": 2.3449845420377402e-07, "loss": 0.3491, "step": 16260 }, { "epoch": 2.36, "grad_norm": 9.152985572814941, "learning_rate": 2.343966193908955e-07, "loss": 0.4458, "step": 16261 }, { "epoch": 2.36, "grad_norm": 7.819471836090088, "learning_rate": 2.3429480375896581e-07, "loss": 0.3224, "step": 16262 }, { "epoch": 2.36, "grad_norm": 8.917250633239746, "learning_rate": 2.341930073105357e-07, "loss": 0.3905, "step": 16263 }, { "epoch": 2.36, "grad_norm": 8.597270011901855, "learning_rate": 2.3409123004815535e-07, "loss": 0.3474, "step": 16264 }, { "epoch": 2.36, "grad_norm": 8.731708526611328, "learning_rate": 2.339894719743749e-07, "loss": 0.3499, "step": 16265 }, { "epoch": 2.36, "grad_norm": 8.679296493530273, "learning_rate": 2.3388773309174348e-07, "loss": 0.3835, "step": 16266 }, { "epoch": 2.36, "grad_norm": 8.909443855285645, "learning_rate": 2.3378601340281024e-07, "loss": 0.3528, "step": 16267 }, { "epoch": 2.36, "grad_norm": 7.636847019195557, "learning_rate": 2.3368431291012336e-07, "loss": 0.3386, "step": 16268 }, { "epoch": 2.36, "grad_norm": 9.246984481811523, "learning_rate": 2.3358263161623094e-07, "loss": 0.3632, "step": 16269 }, { "epoch": 2.36, "grad_norm": 7.9905219078063965, "learning_rate": 2.3348096952368036e-07, "loss": 0.319, "step": 16270 }, { "epoch": 2.36, "grad_norm": 8.691762924194336, "learning_rate": 2.333793266350189e-07, "loss": 0.3502, "step": 16271 }, { "epoch": 2.36, "grad_norm": 8.170917510986328, "learning_rate": 2.3327770295279225e-07, "loss": 0.3506, "step": 16272 }, { "epoch": 2.36, "grad_norm": 10.20965576171875, "learning_rate": 2.3317609847954755e-07, "loss": 0.3647, "step": 16273 }, { "epoch": 2.36, "grad_norm": 9.153525352478027, "learning_rate": 2.3307451321782913e-07, "loss": 0.3986, "step": 16274 }, { "epoch": 2.36, "grad_norm": 9.153922080993652, "learning_rate": 2.3297294717018324e-07, "loss": 0.3056, "step": 16275 }, { "epoch": 2.36, "grad_norm": 9.492732048034668, "learning_rate": 2.328714003391533e-07, "loss": 0.4117, "step": 16276 }, { "epoch": 2.36, "grad_norm": 7.573581695556641, "learning_rate": 2.3276987272728454e-07, "loss": 0.2988, "step": 16277 }, { "epoch": 2.36, "grad_norm": 9.900087356567383, "learning_rate": 2.3266836433711967e-07, "loss": 0.3743, "step": 16278 }, { "epoch": 2.36, "grad_norm": 9.002121925354004, "learning_rate": 2.325668751712022e-07, "loss": 0.3571, "step": 16279 }, { "epoch": 2.36, "grad_norm": 8.6574125289917, "learning_rate": 2.324654052320747e-07, "loss": 0.3616, "step": 16280 }, { "epoch": 2.36, "grad_norm": 7.973369598388672, "learning_rate": 2.3236395452227942e-07, "loss": 0.2836, "step": 16281 }, { "epoch": 2.36, "grad_norm": 8.342321395874023, "learning_rate": 2.3226252304435788e-07, "loss": 0.3627, "step": 16282 }, { "epoch": 2.36, "grad_norm": 9.047561645507812, "learning_rate": 2.3216111080085153e-07, "loss": 0.3535, "step": 16283 }, { "epoch": 2.36, "grad_norm": 9.494735717773438, "learning_rate": 2.3205971779430079e-07, "loss": 0.434, "step": 16284 }, { "epoch": 2.36, "grad_norm": 7.40593147277832, "learning_rate": 2.3195834402724613e-07, "loss": 0.3222, "step": 16285 }, { "epoch": 2.36, "grad_norm": 9.192459106445312, "learning_rate": 2.3185698950222722e-07, "loss": 0.3988, "step": 16286 }, { "epoch": 2.36, "grad_norm": 8.122795104980469, "learning_rate": 2.3175565422178368e-07, "loss": 0.35, "step": 16287 }, { "epoch": 2.36, "grad_norm": 8.491720199584961, "learning_rate": 2.3165433818845326e-07, "loss": 0.3694, "step": 16288 }, { "epoch": 2.36, "grad_norm": 9.548569679260254, "learning_rate": 2.3155304140477529e-07, "loss": 0.3648, "step": 16289 }, { "epoch": 2.36, "grad_norm": 8.597444534301758, "learning_rate": 2.3145176387328736e-07, "loss": 0.3739, "step": 16290 }, { "epoch": 2.36, "grad_norm": 8.334718704223633, "learning_rate": 2.3135050559652658e-07, "loss": 0.3541, "step": 16291 }, { "epoch": 2.36, "grad_norm": 8.896841049194336, "learning_rate": 2.3124926657703036e-07, "loss": 0.3418, "step": 16292 }, { "epoch": 2.36, "grad_norm": 7.941078186035156, "learning_rate": 2.3114804681733402e-07, "loss": 0.3452, "step": 16293 }, { "epoch": 2.36, "grad_norm": 8.282013893127441, "learning_rate": 2.3104684631997473e-07, "loss": 0.374, "step": 16294 }, { "epoch": 2.36, "grad_norm": 8.624608039855957, "learning_rate": 2.3094566508748681e-07, "loss": 0.324, "step": 16295 }, { "epoch": 2.36, "grad_norm": 8.72913646697998, "learning_rate": 2.308445031224061e-07, "loss": 0.3518, "step": 16296 }, { "epoch": 2.36, "grad_norm": 7.528897285461426, "learning_rate": 2.307433604272664e-07, "loss": 0.3016, "step": 16297 }, { "epoch": 2.36, "grad_norm": 8.414656639099121, "learning_rate": 2.3064223700460184e-07, "loss": 0.3542, "step": 16298 }, { "epoch": 2.36, "grad_norm": 8.563711166381836, "learning_rate": 2.30541132856946e-07, "loss": 0.406, "step": 16299 }, { "epoch": 2.37, "grad_norm": 8.823265075683594, "learning_rate": 2.3044004798683182e-07, "loss": 0.381, "step": 16300 }, { "epoch": 2.37, "grad_norm": 8.45260238647461, "learning_rate": 2.3033898239679184e-07, "loss": 0.3858, "step": 16301 }, { "epoch": 2.37, "grad_norm": 9.010002136230469, "learning_rate": 2.3023793608935814e-07, "loss": 0.394, "step": 16302 }, { "epoch": 2.37, "grad_norm": 8.911786079406738, "learning_rate": 2.3013690906706217e-07, "loss": 0.3339, "step": 16303 }, { "epoch": 2.37, "grad_norm": 7.933607578277588, "learning_rate": 2.30035901332435e-07, "loss": 0.3514, "step": 16304 }, { "epoch": 2.37, "grad_norm": 8.791948318481445, "learning_rate": 2.2993491288800737e-07, "loss": 0.4263, "step": 16305 }, { "epoch": 2.37, "grad_norm": 9.453155517578125, "learning_rate": 2.2983394373630949e-07, "loss": 0.4024, "step": 16306 }, { "epoch": 2.37, "grad_norm": 7.924380302429199, "learning_rate": 2.2973299387987023e-07, "loss": 0.3302, "step": 16307 }, { "epoch": 2.37, "grad_norm": 8.149269104003906, "learning_rate": 2.2963206332121977e-07, "loss": 0.3454, "step": 16308 }, { "epoch": 2.37, "grad_norm": 8.062806129455566, "learning_rate": 2.2953115206288575e-07, "loss": 0.3431, "step": 16309 }, { "epoch": 2.37, "grad_norm": 8.51481819152832, "learning_rate": 2.294302601073973e-07, "loss": 0.3441, "step": 16310 }, { "epoch": 2.37, "grad_norm": 7.849857330322266, "learning_rate": 2.2932938745728114e-07, "loss": 0.3403, "step": 16311 }, { "epoch": 2.37, "grad_norm": 8.078876495361328, "learning_rate": 2.2922853411506559e-07, "loss": 0.3398, "step": 16312 }, { "epoch": 2.37, "grad_norm": 8.251852989196777, "learning_rate": 2.2912770008327632e-07, "loss": 0.3436, "step": 16313 }, { "epoch": 2.37, "grad_norm": 9.496435165405273, "learning_rate": 2.2902688536444014e-07, "loss": 0.3999, "step": 16314 }, { "epoch": 2.37, "grad_norm": 8.526312828063965, "learning_rate": 2.2892608996108255e-07, "loss": 0.3723, "step": 16315 }, { "epoch": 2.37, "grad_norm": 7.679638385772705, "learning_rate": 2.2882531387572869e-07, "loss": 0.317, "step": 16316 }, { "epoch": 2.37, "grad_norm": 8.557072639465332, "learning_rate": 2.287245571109041e-07, "loss": 0.3559, "step": 16317 }, { "epoch": 2.37, "grad_norm": 9.542192459106445, "learning_rate": 2.2862381966913223e-07, "loss": 0.4057, "step": 16318 }, { "epoch": 2.37, "grad_norm": 7.675724983215332, "learning_rate": 2.285231015529372e-07, "loss": 0.3904, "step": 16319 }, { "epoch": 2.37, "grad_norm": 7.990339756011963, "learning_rate": 2.2842240276484236e-07, "loss": 0.3716, "step": 16320 }, { "epoch": 2.37, "grad_norm": 7.993215084075928, "learning_rate": 2.2832172330737055e-07, "loss": 0.352, "step": 16321 }, { "epoch": 2.37, "grad_norm": 8.952374458312988, "learning_rate": 2.2822106318304412e-07, "loss": 0.3865, "step": 16322 }, { "epoch": 2.37, "grad_norm": 8.54818344116211, "learning_rate": 2.281204223943849e-07, "loss": 0.3616, "step": 16323 }, { "epoch": 2.37, "grad_norm": 9.033141136169434, "learning_rate": 2.2801980094391437e-07, "loss": 0.3386, "step": 16324 }, { "epoch": 2.37, "grad_norm": 9.776049613952637, "learning_rate": 2.2791919883415344e-07, "loss": 0.3446, "step": 16325 }, { "epoch": 2.37, "grad_norm": 9.941350936889648, "learning_rate": 2.2781861606762242e-07, "loss": 0.3851, "step": 16326 }, { "epoch": 2.37, "grad_norm": 11.059444427490234, "learning_rate": 2.2771805264684164e-07, "loss": 0.3843, "step": 16327 }, { "epoch": 2.37, "grad_norm": 9.147468566894531, "learning_rate": 2.276175085743296e-07, "loss": 0.3335, "step": 16328 }, { "epoch": 2.37, "grad_norm": 9.101420402526855, "learning_rate": 2.2751698385260643e-07, "loss": 0.3602, "step": 16329 }, { "epoch": 2.37, "grad_norm": 9.144222259521484, "learning_rate": 2.2741647848418954e-07, "loss": 0.3602, "step": 16330 }, { "epoch": 2.37, "grad_norm": 8.527280807495117, "learning_rate": 2.2731599247159805e-07, "loss": 0.3893, "step": 16331 }, { "epoch": 2.37, "grad_norm": 8.416847229003906, "learning_rate": 2.2721552581734837e-07, "loss": 0.3681, "step": 16332 }, { "epoch": 2.37, "grad_norm": 8.467269897460938, "learning_rate": 2.271150785239585e-07, "loss": 0.338, "step": 16333 }, { "epoch": 2.37, "grad_norm": 8.326586723327637, "learning_rate": 2.2701465059394432e-07, "loss": 0.3144, "step": 16334 }, { "epoch": 2.37, "grad_norm": 8.78797435760498, "learning_rate": 2.2691424202982212e-07, "loss": 0.3759, "step": 16335 }, { "epoch": 2.37, "grad_norm": 9.224249839782715, "learning_rate": 2.268138528341075e-07, "loss": 0.3448, "step": 16336 }, { "epoch": 2.37, "grad_norm": 9.167381286621094, "learning_rate": 2.2671348300931536e-07, "loss": 0.3398, "step": 16337 }, { "epoch": 2.37, "grad_norm": 7.547333240509033, "learning_rate": 2.2661313255796056e-07, "loss": 0.3142, "step": 16338 }, { "epoch": 2.37, "grad_norm": 9.446465492248535, "learning_rate": 2.2651280148255703e-07, "loss": 0.341, "step": 16339 }, { "epoch": 2.37, "grad_norm": 9.32198715209961, "learning_rate": 2.2641248978561856e-07, "loss": 0.3889, "step": 16340 }, { "epoch": 2.37, "grad_norm": 8.404889106750488, "learning_rate": 2.2631219746965814e-07, "loss": 0.38, "step": 16341 }, { "epoch": 2.37, "grad_norm": 8.231451034545898, "learning_rate": 2.262119245371885e-07, "loss": 0.3729, "step": 16342 }, { "epoch": 2.37, "grad_norm": 9.929405212402344, "learning_rate": 2.2611167099072182e-07, "loss": 0.3747, "step": 16343 }, { "epoch": 2.37, "grad_norm": 8.035998344421387, "learning_rate": 2.260114368327698e-07, "loss": 0.3403, "step": 16344 }, { "epoch": 2.37, "grad_norm": 8.964194297790527, "learning_rate": 2.2591122206584368e-07, "loss": 0.3359, "step": 16345 }, { "epoch": 2.37, "grad_norm": 8.669681549072266, "learning_rate": 2.25811026692454e-07, "loss": 0.358, "step": 16346 }, { "epoch": 2.37, "grad_norm": 8.628530502319336, "learning_rate": 2.257108507151112e-07, "loss": 0.3112, "step": 16347 }, { "epoch": 2.37, "grad_norm": 7.3969950675964355, "learning_rate": 2.2561069413632526e-07, "loss": 0.3008, "step": 16348 }, { "epoch": 2.37, "grad_norm": 8.817646980285645, "learning_rate": 2.2551055695860454e-07, "loss": 0.36, "step": 16349 }, { "epoch": 2.37, "grad_norm": 8.589139938354492, "learning_rate": 2.25410439184459e-07, "loss": 0.3073, "step": 16350 }, { "epoch": 2.37, "grad_norm": 8.896947860717773, "learning_rate": 2.2531034081639567e-07, "loss": 0.3495, "step": 16351 }, { "epoch": 2.37, "grad_norm": 11.150468826293945, "learning_rate": 2.252102618569236e-07, "loss": 0.3172, "step": 16352 }, { "epoch": 2.37, "grad_norm": 8.697052955627441, "learning_rate": 2.2511020230854927e-07, "loss": 0.3113, "step": 16353 }, { "epoch": 2.37, "grad_norm": 8.227383613586426, "learning_rate": 2.2501016217377977e-07, "loss": 0.2949, "step": 16354 }, { "epoch": 2.37, "grad_norm": 9.733920097351074, "learning_rate": 2.2491014145512145e-07, "loss": 0.3993, "step": 16355 }, { "epoch": 2.37, "grad_norm": 8.122160911560059, "learning_rate": 2.2481014015508016e-07, "loss": 0.3505, "step": 16356 }, { "epoch": 2.37, "grad_norm": 10.127805709838867, "learning_rate": 2.2471015827616114e-07, "loss": 0.4187, "step": 16357 }, { "epoch": 2.37, "grad_norm": 8.2211332321167, "learning_rate": 2.246101958208695e-07, "loss": 0.3453, "step": 16358 }, { "epoch": 2.37, "grad_norm": 8.208657264709473, "learning_rate": 2.2451025279170955e-07, "loss": 0.3276, "step": 16359 }, { "epoch": 2.37, "grad_norm": 7.825013637542725, "learning_rate": 2.244103291911851e-07, "loss": 0.349, "step": 16360 }, { "epoch": 2.37, "grad_norm": 9.428936004638672, "learning_rate": 2.2431042502179964e-07, "loss": 0.3699, "step": 16361 }, { "epoch": 2.37, "grad_norm": 7.492372035980225, "learning_rate": 2.2421054028605647e-07, "loss": 0.2977, "step": 16362 }, { "epoch": 2.37, "grad_norm": 9.131292343139648, "learning_rate": 2.2411067498645708e-07, "loss": 0.3747, "step": 16363 }, { "epoch": 2.37, "grad_norm": 8.47329330444336, "learning_rate": 2.2401082912550463e-07, "loss": 0.3742, "step": 16364 }, { "epoch": 2.37, "grad_norm": 8.455909729003906, "learning_rate": 2.239110027056994e-07, "loss": 0.3222, "step": 16365 }, { "epoch": 2.37, "grad_norm": 9.087706565856934, "learning_rate": 2.2381119572954353e-07, "loss": 0.3603, "step": 16366 }, { "epoch": 2.37, "grad_norm": 8.214860916137695, "learning_rate": 2.2371140819953649e-07, "loss": 0.3207, "step": 16367 }, { "epoch": 2.37, "grad_norm": 8.531004905700684, "learning_rate": 2.23611640118179e-07, "loss": 0.3267, "step": 16368 }, { "epoch": 2.38, "grad_norm": 8.392465591430664, "learning_rate": 2.2351189148797068e-07, "loss": 0.3494, "step": 16369 }, { "epoch": 2.38, "grad_norm": 9.415353775024414, "learning_rate": 2.234121623114097e-07, "loss": 0.36, "step": 16370 }, { "epoch": 2.38, "grad_norm": 9.031339645385742, "learning_rate": 2.2331245259099573e-07, "loss": 0.323, "step": 16371 }, { "epoch": 2.38, "grad_norm": 7.423157691955566, "learning_rate": 2.232127623292257e-07, "loss": 0.3112, "step": 16372 }, { "epoch": 2.38, "grad_norm": 9.57886791229248, "learning_rate": 2.2311309152859836e-07, "loss": 0.4226, "step": 16373 }, { "epoch": 2.38, "grad_norm": 8.601959228515625, "learning_rate": 2.2301344019160983e-07, "loss": 0.3432, "step": 16374 }, { "epoch": 2.38, "grad_norm": 8.225706100463867, "learning_rate": 2.2291380832075724e-07, "loss": 0.3091, "step": 16375 }, { "epoch": 2.38, "grad_norm": 7.9147419929504395, "learning_rate": 2.2281419591853644e-07, "loss": 0.3541, "step": 16376 }, { "epoch": 2.38, "grad_norm": 7.588927745819092, "learning_rate": 2.2271460298744317e-07, "loss": 0.3153, "step": 16377 }, { "epoch": 2.38, "grad_norm": 10.623414993286133, "learning_rate": 2.2261502952997256e-07, "loss": 0.4288, "step": 16378 }, { "epoch": 2.38, "grad_norm": 8.159727096557617, "learning_rate": 2.2251547554861938e-07, "loss": 0.3984, "step": 16379 }, { "epoch": 2.38, "grad_norm": 9.477229118347168, "learning_rate": 2.2241594104587758e-07, "loss": 0.384, "step": 16380 }, { "epoch": 2.38, "grad_norm": 9.109763145446777, "learning_rate": 2.2231642602424095e-07, "loss": 0.3312, "step": 16381 }, { "epoch": 2.38, "grad_norm": 8.722047805786133, "learning_rate": 2.222169304862026e-07, "loss": 0.391, "step": 16382 }, { "epoch": 2.38, "grad_norm": 7.401651859283447, "learning_rate": 2.2211745443425567e-07, "loss": 0.3046, "step": 16383 }, { "epoch": 2.38, "grad_norm": 10.05452823638916, "learning_rate": 2.2201799787089136e-07, "loss": 0.4093, "step": 16384 }, { "epoch": 2.38, "grad_norm": 7.845113754272461, "learning_rate": 2.219185607986027e-07, "loss": 0.3219, "step": 16385 }, { "epoch": 2.38, "grad_norm": 7.863786697387695, "learning_rate": 2.2181914321987961e-07, "loss": 0.3151, "step": 16386 }, { "epoch": 2.38, "grad_norm": 8.261112213134766, "learning_rate": 2.2171974513721402e-07, "loss": 0.3343, "step": 16387 }, { "epoch": 2.38, "grad_norm": 8.653651237487793, "learning_rate": 2.2162036655309536e-07, "loss": 0.3467, "step": 16388 }, { "epoch": 2.38, "grad_norm": 9.086587905883789, "learning_rate": 2.2152100747001367e-07, "loss": 0.3418, "step": 16389 }, { "epoch": 2.38, "grad_norm": 7.44246768951416, "learning_rate": 2.2142166789045836e-07, "loss": 0.3194, "step": 16390 }, { "epoch": 2.38, "grad_norm": 8.387954711914062, "learning_rate": 2.2132234781691806e-07, "loss": 0.3782, "step": 16391 }, { "epoch": 2.38, "grad_norm": 8.222331047058105, "learning_rate": 2.21223047251881e-07, "loss": 0.3515, "step": 16392 }, { "epoch": 2.38, "grad_norm": 8.786778450012207, "learning_rate": 2.2112376619783523e-07, "loss": 0.3536, "step": 16393 }, { "epoch": 2.38, "grad_norm": 9.174691200256348, "learning_rate": 2.2102450465726785e-07, "loss": 0.3154, "step": 16394 }, { "epoch": 2.38, "grad_norm": 8.504730224609375, "learning_rate": 2.2092526263266587e-07, "loss": 0.3461, "step": 16395 }, { "epoch": 2.38, "grad_norm": 7.906966686248779, "learning_rate": 2.2082604012651563e-07, "loss": 0.357, "step": 16396 }, { "epoch": 2.38, "grad_norm": 8.457464218139648, "learning_rate": 2.2072683714130292e-07, "loss": 0.3456, "step": 16397 }, { "epoch": 2.38, "grad_norm": 7.871398448944092, "learning_rate": 2.2062765367951296e-07, "loss": 0.3631, "step": 16398 }, { "epoch": 2.38, "grad_norm": 8.190930366516113, "learning_rate": 2.2052848974363093e-07, "loss": 0.3267, "step": 16399 }, { "epoch": 2.38, "grad_norm": 8.423611640930176, "learning_rate": 2.204293453361411e-07, "loss": 0.3252, "step": 16400 }, { "epoch": 2.38, "grad_norm": 9.18637466430664, "learning_rate": 2.203302204595273e-07, "loss": 0.39, "step": 16401 }, { "epoch": 2.38, "grad_norm": 8.11488151550293, "learning_rate": 2.20231115116273e-07, "loss": 0.355, "step": 16402 }, { "epoch": 2.38, "grad_norm": 8.900459289550781, "learning_rate": 2.201320293088611e-07, "loss": 0.3474, "step": 16403 }, { "epoch": 2.38, "grad_norm": 8.236082077026367, "learning_rate": 2.2003296303977436e-07, "loss": 0.3365, "step": 16404 }, { "epoch": 2.38, "grad_norm": 9.850357055664062, "learning_rate": 2.1993391631149383e-07, "loss": 0.4038, "step": 16405 }, { "epoch": 2.38, "grad_norm": 10.164751052856445, "learning_rate": 2.1983488912650218e-07, "loss": 0.3967, "step": 16406 }, { "epoch": 2.38, "grad_norm": 8.449658393859863, "learning_rate": 2.1973588148727918e-07, "loss": 0.3611, "step": 16407 }, { "epoch": 2.38, "grad_norm": 8.189579963684082, "learning_rate": 2.1963689339630642e-07, "loss": 0.3424, "step": 16408 }, { "epoch": 2.38, "grad_norm": 9.49251651763916, "learning_rate": 2.1953792485606304e-07, "loss": 0.3919, "step": 16409 }, { "epoch": 2.38, "grad_norm": 7.434010982513428, "learning_rate": 2.1943897586902883e-07, "loss": 0.3114, "step": 16410 }, { "epoch": 2.38, "grad_norm": 9.735777854919434, "learning_rate": 2.1934004643768277e-07, "loss": 0.4185, "step": 16411 }, { "epoch": 2.38, "grad_norm": 7.721217155456543, "learning_rate": 2.1924113656450338e-07, "loss": 0.2656, "step": 16412 }, { "epoch": 2.38, "grad_norm": 9.047133445739746, "learning_rate": 2.1914224625196886e-07, "loss": 0.3305, "step": 16413 }, { "epoch": 2.38, "grad_norm": 8.81106948852539, "learning_rate": 2.1904337550255647e-07, "loss": 0.2851, "step": 16414 }, { "epoch": 2.38, "grad_norm": 8.263090133666992, "learning_rate": 2.1894452431874345e-07, "loss": 0.3186, "step": 16415 }, { "epoch": 2.38, "grad_norm": 7.636951923370361, "learning_rate": 2.1884569270300624e-07, "loss": 0.4057, "step": 16416 }, { "epoch": 2.38, "grad_norm": 8.323488235473633, "learning_rate": 2.18746880657821e-07, "loss": 0.3755, "step": 16417 }, { "epoch": 2.38, "grad_norm": 8.952743530273438, "learning_rate": 2.186480881856636e-07, "loss": 0.3436, "step": 16418 }, { "epoch": 2.38, "grad_norm": 9.53447151184082, "learning_rate": 2.185493152890081e-07, "loss": 0.3146, "step": 16419 }, { "epoch": 2.38, "grad_norm": 9.006464004516602, "learning_rate": 2.1845056197033018e-07, "loss": 0.3621, "step": 16420 }, { "epoch": 2.38, "grad_norm": 9.369039535522461, "learning_rate": 2.1835182823210342e-07, "loss": 0.3466, "step": 16421 }, { "epoch": 2.38, "grad_norm": 9.315408706665039, "learning_rate": 2.182531140768017e-07, "loss": 0.384, "step": 16422 }, { "epoch": 2.38, "grad_norm": 7.38009786605835, "learning_rate": 2.1815441950689783e-07, "loss": 0.318, "step": 16423 }, { "epoch": 2.38, "grad_norm": 8.345431327819824, "learning_rate": 2.1805574452486476e-07, "loss": 0.3416, "step": 16424 }, { "epoch": 2.38, "grad_norm": 8.702622413635254, "learning_rate": 2.1795708913317468e-07, "loss": 0.3918, "step": 16425 }, { "epoch": 2.38, "grad_norm": 10.025197982788086, "learning_rate": 2.1785845333429854e-07, "loss": 0.3641, "step": 16426 }, { "epoch": 2.38, "grad_norm": 9.589873313903809, "learning_rate": 2.1775983713070857e-07, "loss": 0.3502, "step": 16427 }, { "epoch": 2.38, "grad_norm": 8.615982055664062, "learning_rate": 2.1766124052487434e-07, "loss": 0.3714, "step": 16428 }, { "epoch": 2.38, "grad_norm": 11.14565658569336, "learning_rate": 2.17562663519267e-07, "loss": 0.4342, "step": 16429 }, { "epoch": 2.38, "grad_norm": 7.750438690185547, "learning_rate": 2.1746410611635557e-07, "loss": 0.3005, "step": 16430 }, { "epoch": 2.38, "grad_norm": 9.045889854431152, "learning_rate": 2.1736556831860952e-07, "loss": 0.3678, "step": 16431 }, { "epoch": 2.38, "grad_norm": 8.194576263427734, "learning_rate": 2.1726705012849755e-07, "loss": 0.3263, "step": 16432 }, { "epoch": 2.38, "grad_norm": 9.469276428222656, "learning_rate": 2.1716855154848768e-07, "loss": 0.3556, "step": 16433 }, { "epoch": 2.38, "grad_norm": 8.864192962646484, "learning_rate": 2.1707007258104792e-07, "loss": 0.4072, "step": 16434 }, { "epoch": 2.38, "grad_norm": 8.334941864013672, "learning_rate": 2.169716132286453e-07, "loss": 0.3459, "step": 16435 }, { "epoch": 2.38, "grad_norm": 8.830384254455566, "learning_rate": 2.1687317349374668e-07, "loss": 0.344, "step": 16436 }, { "epoch": 2.38, "grad_norm": 8.570765495300293, "learning_rate": 2.1677475337881813e-07, "loss": 0.3489, "step": 16437 }, { "epoch": 2.39, "grad_norm": 14.668749809265137, "learning_rate": 2.1667635288632567e-07, "loss": 0.3558, "step": 16438 }, { "epoch": 2.39, "grad_norm": 7.453652858734131, "learning_rate": 2.1657797201873462e-07, "loss": 0.2908, "step": 16439 }, { "epoch": 2.39, "grad_norm": 8.486787796020508, "learning_rate": 2.1647961077850897e-07, "loss": 0.367, "step": 16440 }, { "epoch": 2.39, "grad_norm": 8.65507984161377, "learning_rate": 2.1638126916811416e-07, "loss": 0.3642, "step": 16441 }, { "epoch": 2.39, "grad_norm": 8.528425216674805, "learning_rate": 2.1628294719001294e-07, "loss": 0.3821, "step": 16442 }, { "epoch": 2.39, "grad_norm": 8.796679496765137, "learning_rate": 2.1618464484666955e-07, "loss": 0.3567, "step": 16443 }, { "epoch": 2.39, "grad_norm": 8.171314239501953, "learning_rate": 2.16086362140546e-07, "loss": 0.3746, "step": 16444 }, { "epoch": 2.39, "grad_norm": 7.760627746582031, "learning_rate": 2.1598809907410498e-07, "loss": 0.3184, "step": 16445 }, { "epoch": 2.39, "grad_norm": 8.977507591247559, "learning_rate": 2.1588985564980833e-07, "loss": 0.3935, "step": 16446 }, { "epoch": 2.39, "grad_norm": 9.94063663482666, "learning_rate": 2.1579163187011683e-07, "loss": 0.4154, "step": 16447 }, { "epoch": 2.39, "grad_norm": 7.8565874099731445, "learning_rate": 2.1569342773749254e-07, "loss": 0.3467, "step": 16448 }, { "epoch": 2.39, "grad_norm": 7.690961837768555, "learning_rate": 2.1559524325439438e-07, "loss": 0.3488, "step": 16449 }, { "epoch": 2.39, "grad_norm": 8.077354431152344, "learning_rate": 2.1549707842328347e-07, "loss": 0.3195, "step": 16450 }, { "epoch": 2.39, "grad_norm": 7.7052693367004395, "learning_rate": 2.1539893324661828e-07, "loss": 0.3335, "step": 16451 }, { "epoch": 2.39, "grad_norm": 8.405394554138184, "learning_rate": 2.15300807726858e-07, "loss": 0.3191, "step": 16452 }, { "epoch": 2.39, "grad_norm": 7.5733184814453125, "learning_rate": 2.1520270186646094e-07, "loss": 0.301, "step": 16453 }, { "epoch": 2.39, "grad_norm": 8.306865692138672, "learning_rate": 2.1510461566788508e-07, "loss": 0.3451, "step": 16454 }, { "epoch": 2.39, "grad_norm": 9.050064086914062, "learning_rate": 2.1500654913358773e-07, "loss": 0.3685, "step": 16455 }, { "epoch": 2.39, "grad_norm": 8.53612995147705, "learning_rate": 2.1490850226602576e-07, "loss": 0.4047, "step": 16456 }, { "epoch": 2.39, "grad_norm": 8.602662086486816, "learning_rate": 2.148104750676557e-07, "loss": 0.3413, "step": 16457 }, { "epoch": 2.39, "grad_norm": 7.644983291625977, "learning_rate": 2.1471246754093332e-07, "loss": 0.324, "step": 16458 }, { "epoch": 2.39, "grad_norm": 10.757277488708496, "learning_rate": 2.1461447968831415e-07, "loss": 0.406, "step": 16459 }, { "epoch": 2.39, "grad_norm": 9.349442481994629, "learning_rate": 2.1451651151225335e-07, "loss": 0.3481, "step": 16460 }, { "epoch": 2.39, "grad_norm": 8.180890083312988, "learning_rate": 2.1441856301520454e-07, "loss": 0.3721, "step": 16461 }, { "epoch": 2.39, "grad_norm": 8.75586986541748, "learning_rate": 2.1432063419962277e-07, "loss": 0.3296, "step": 16462 }, { "epoch": 2.39, "grad_norm": 8.557836532592773, "learning_rate": 2.1422272506796024e-07, "loss": 0.3667, "step": 16463 }, { "epoch": 2.39, "grad_norm": 9.141493797302246, "learning_rate": 2.1412483562267126e-07, "loss": 0.3969, "step": 16464 }, { "epoch": 2.39, "grad_norm": 10.041230201721191, "learning_rate": 2.1402696586620728e-07, "loss": 0.3657, "step": 16465 }, { "epoch": 2.39, "grad_norm": 7.691375732421875, "learning_rate": 2.1392911580102059e-07, "loss": 0.327, "step": 16466 }, { "epoch": 2.39, "grad_norm": 8.831114768981934, "learning_rate": 2.1383128542956276e-07, "loss": 0.3843, "step": 16467 }, { "epoch": 2.39, "grad_norm": 7.884493350982666, "learning_rate": 2.137334747542846e-07, "loss": 0.3325, "step": 16468 }, { "epoch": 2.39, "grad_norm": 8.338430404663086, "learning_rate": 2.1363568377763663e-07, "loss": 0.3757, "step": 16469 }, { "epoch": 2.39, "grad_norm": 7.6066975593566895, "learning_rate": 2.1353791250206897e-07, "loss": 0.3299, "step": 16470 }, { "epoch": 2.39, "grad_norm": 8.413636207580566, "learning_rate": 2.1344016093003104e-07, "loss": 0.363, "step": 16471 }, { "epoch": 2.39, "grad_norm": 7.421097755432129, "learning_rate": 2.1334242906397193e-07, "loss": 0.3183, "step": 16472 }, { "epoch": 2.39, "grad_norm": 7.964674472808838, "learning_rate": 2.1324471690633995e-07, "loss": 0.3737, "step": 16473 }, { "epoch": 2.39, "grad_norm": 8.529491424560547, "learning_rate": 2.1314702445958333e-07, "loss": 0.3546, "step": 16474 }, { "epoch": 2.39, "grad_norm": 8.25613784790039, "learning_rate": 2.1304935172614945e-07, "loss": 0.2955, "step": 16475 }, { "epoch": 2.39, "grad_norm": 8.361870765686035, "learning_rate": 2.1295169870848539e-07, "loss": 0.3372, "step": 16476 }, { "epoch": 2.39, "grad_norm": 8.309642791748047, "learning_rate": 2.1285406540903772e-07, "loss": 0.3399, "step": 16477 }, { "epoch": 2.39, "grad_norm": 8.535435676574707, "learning_rate": 2.1275645183025248e-07, "loss": 0.3873, "step": 16478 }, { "epoch": 2.39, "grad_norm": 8.125800132751465, "learning_rate": 2.126588579745755e-07, "loss": 0.3473, "step": 16479 }, { "epoch": 2.39, "grad_norm": 8.238713264465332, "learning_rate": 2.1256128384445094e-07, "loss": 0.3258, "step": 16480 }, { "epoch": 2.39, "grad_norm": 8.702709197998047, "learning_rate": 2.1246372944232461e-07, "loss": 0.407, "step": 16481 }, { "epoch": 2.39, "grad_norm": 8.536480903625488, "learning_rate": 2.1236619477063923e-07, "loss": 0.3829, "step": 16482 }, { "epoch": 2.39, "grad_norm": 7.603716850280762, "learning_rate": 2.1226867983183972e-07, "loss": 0.36, "step": 16483 }, { "epoch": 2.39, "grad_norm": 9.045105934143066, "learning_rate": 2.121711846283679e-07, "loss": 0.3248, "step": 16484 }, { "epoch": 2.39, "grad_norm": 8.336276054382324, "learning_rate": 2.1207370916266754e-07, "loss": 0.3223, "step": 16485 }, { "epoch": 2.39, "grad_norm": 8.300760269165039, "learning_rate": 2.1197625343717984e-07, "loss": 0.3515, "step": 16486 }, { "epoch": 2.39, "grad_norm": 8.151969909667969, "learning_rate": 2.1187881745434676e-07, "loss": 0.3702, "step": 16487 }, { "epoch": 2.39, "grad_norm": 9.114928245544434, "learning_rate": 2.1178140121660936e-07, "loss": 0.4022, "step": 16488 }, { "epoch": 2.39, "grad_norm": 8.462237358093262, "learning_rate": 2.1168400472640825e-07, "loss": 0.3128, "step": 16489 }, { "epoch": 2.39, "grad_norm": 8.460206031799316, "learning_rate": 2.1158662798618344e-07, "loss": 0.3872, "step": 16490 }, { "epoch": 2.39, "grad_norm": 9.288129806518555, "learning_rate": 2.1148927099837465e-07, "loss": 0.3713, "step": 16491 }, { "epoch": 2.39, "grad_norm": 8.299436569213867, "learning_rate": 2.1139193376542097e-07, "loss": 0.3563, "step": 16492 }, { "epoch": 2.39, "grad_norm": 9.884912490844727, "learning_rate": 2.1129461628976107e-07, "loss": 0.3954, "step": 16493 }, { "epoch": 2.39, "grad_norm": 8.834782600402832, "learning_rate": 2.1119731857383306e-07, "loss": 0.3907, "step": 16494 }, { "epoch": 2.39, "grad_norm": 8.38166332244873, "learning_rate": 2.111000406200748e-07, "loss": 0.316, "step": 16495 }, { "epoch": 2.39, "grad_norm": 8.28315544128418, "learning_rate": 2.1100278243092262e-07, "loss": 0.3418, "step": 16496 }, { "epoch": 2.39, "grad_norm": 8.557209968566895, "learning_rate": 2.1090554400881434e-07, "loss": 0.3949, "step": 16497 }, { "epoch": 2.39, "grad_norm": 7.559431552886963, "learning_rate": 2.1080832535618498e-07, "loss": 0.3086, "step": 16498 }, { "epoch": 2.39, "grad_norm": 8.817828178405762, "learning_rate": 2.1071112647547095e-07, "loss": 0.3277, "step": 16499 }, { "epoch": 2.39, "grad_norm": 8.748130798339844, "learning_rate": 2.1061394736910753e-07, "loss": 0.3175, "step": 16500 }, { "epoch": 2.39, "grad_norm": 8.520998001098633, "learning_rate": 2.1051678803952844e-07, "loss": 0.3714, "step": 16501 }, { "epoch": 2.39, "grad_norm": 7.538527488708496, "learning_rate": 2.1041964848916904e-07, "loss": 0.346, "step": 16502 }, { "epoch": 2.39, "grad_norm": 9.49951171875, "learning_rate": 2.1032252872046175e-07, "loss": 0.4225, "step": 16503 }, { "epoch": 2.39, "grad_norm": 8.858929634094238, "learning_rate": 2.1022542873584104e-07, "loss": 0.3799, "step": 16504 }, { "epoch": 2.39, "grad_norm": 8.98426628112793, "learning_rate": 2.1012834853773842e-07, "loss": 0.3999, "step": 16505 }, { "epoch": 2.39, "grad_norm": 8.681769371032715, "learning_rate": 2.1003128812858717e-07, "loss": 0.3553, "step": 16506 }, { "epoch": 2.4, "grad_norm": 7.345857620239258, "learning_rate": 2.0993424751081823e-07, "loss": 0.3104, "step": 16507 }, { "epoch": 2.4, "grad_norm": 7.489382743835449, "learning_rate": 2.0983722668686288e-07, "loss": 0.3316, "step": 16508 }, { "epoch": 2.4, "grad_norm": 8.586915969848633, "learning_rate": 2.0974022565915195e-07, "loss": 0.3703, "step": 16509 }, { "epoch": 2.4, "grad_norm": 9.10700798034668, "learning_rate": 2.096432444301157e-07, "loss": 0.3869, "step": 16510 }, { "epoch": 2.4, "grad_norm": 8.7770414352417, "learning_rate": 2.095462830021837e-07, "loss": 0.3708, "step": 16511 }, { "epoch": 2.4, "grad_norm": 9.124751091003418, "learning_rate": 2.0944934137778525e-07, "loss": 0.3845, "step": 16512 }, { "epoch": 2.4, "grad_norm": 8.433202743530273, "learning_rate": 2.0935241955934902e-07, "loss": 0.3572, "step": 16513 }, { "epoch": 2.4, "grad_norm": 8.986360549926758, "learning_rate": 2.0925551754930327e-07, "loss": 0.3305, "step": 16514 }, { "epoch": 2.4, "grad_norm": 9.1826171875, "learning_rate": 2.0915863535007573e-07, "loss": 0.3738, "step": 16515 }, { "epoch": 2.4, "grad_norm": 9.91230583190918, "learning_rate": 2.090617729640939e-07, "loss": 0.4104, "step": 16516 }, { "epoch": 2.4, "grad_norm": 8.62595272064209, "learning_rate": 2.089649303937836e-07, "loss": 0.3706, "step": 16517 }, { "epoch": 2.4, "grad_norm": 9.252689361572266, "learning_rate": 2.0886810764157214e-07, "loss": 0.4612, "step": 16518 }, { "epoch": 2.4, "grad_norm": 8.532537460327148, "learning_rate": 2.0877130470988436e-07, "loss": 0.3698, "step": 16519 }, { "epoch": 2.4, "grad_norm": 8.394865036010742, "learning_rate": 2.086745216011464e-07, "loss": 0.3262, "step": 16520 }, { "epoch": 2.4, "grad_norm": 8.761405944824219, "learning_rate": 2.0857775831778236e-07, "loss": 0.3309, "step": 16521 }, { "epoch": 2.4, "grad_norm": 8.2987699508667, "learning_rate": 2.084810148622166e-07, "loss": 0.3598, "step": 16522 }, { "epoch": 2.4, "grad_norm": 9.445357322692871, "learning_rate": 2.0838429123687297e-07, "loss": 0.3558, "step": 16523 }, { "epoch": 2.4, "grad_norm": 9.869637489318848, "learning_rate": 2.0828758744417429e-07, "loss": 0.3412, "step": 16524 }, { "epoch": 2.4, "grad_norm": 8.648916244506836, "learning_rate": 2.0819090348654444e-07, "loss": 0.3406, "step": 16525 }, { "epoch": 2.4, "grad_norm": 7.288173198699951, "learning_rate": 2.0809423936640457e-07, "loss": 0.327, "step": 16526 }, { "epoch": 2.4, "grad_norm": 9.628667831420898, "learning_rate": 2.0799759508617676e-07, "loss": 0.3169, "step": 16527 }, { "epoch": 2.4, "grad_norm": 8.098291397094727, "learning_rate": 2.0790097064828238e-07, "loss": 0.3235, "step": 16528 }, { "epoch": 2.4, "grad_norm": 9.551055908203125, "learning_rate": 2.0780436605514217e-07, "loss": 0.3763, "step": 16529 }, { "epoch": 2.4, "grad_norm": 7.687042236328125, "learning_rate": 2.0770778130917644e-07, "loss": 0.3508, "step": 16530 }, { "epoch": 2.4, "grad_norm": 10.01806640625, "learning_rate": 2.0761121641280488e-07, "loss": 0.4118, "step": 16531 }, { "epoch": 2.4, "grad_norm": 9.66785717010498, "learning_rate": 2.075146713684468e-07, "loss": 0.3355, "step": 16532 }, { "epoch": 2.4, "grad_norm": 6.681211948394775, "learning_rate": 2.0741814617852094e-07, "loss": 0.2755, "step": 16533 }, { "epoch": 2.4, "grad_norm": 7.8192572593688965, "learning_rate": 2.0732164084544569e-07, "loss": 0.3196, "step": 16534 }, { "epoch": 2.4, "grad_norm": 9.04145622253418, "learning_rate": 2.0722515537163887e-07, "loss": 0.3822, "step": 16535 }, { "epoch": 2.4, "grad_norm": 8.99181079864502, "learning_rate": 2.0712868975951725e-07, "loss": 0.3517, "step": 16536 }, { "epoch": 2.4, "grad_norm": 11.180892944335938, "learning_rate": 2.0703224401149853e-07, "loss": 0.4158, "step": 16537 }, { "epoch": 2.4, "grad_norm": 8.53264331817627, "learning_rate": 2.0693581812999795e-07, "loss": 0.3437, "step": 16538 }, { "epoch": 2.4, "grad_norm": 7.607216835021973, "learning_rate": 2.068394121174325e-07, "loss": 0.2825, "step": 16539 }, { "epoch": 2.4, "grad_norm": 8.50400447845459, "learning_rate": 2.067430259762162e-07, "loss": 0.3466, "step": 16540 }, { "epoch": 2.4, "grad_norm": 7.926458358764648, "learning_rate": 2.0664665970876495e-07, "loss": 0.3592, "step": 16541 }, { "epoch": 2.4, "grad_norm": 8.434796333312988, "learning_rate": 2.065503133174924e-07, "loss": 0.3699, "step": 16542 }, { "epoch": 2.4, "grad_norm": 8.54517650604248, "learning_rate": 2.0645398680481252e-07, "loss": 0.3398, "step": 16543 }, { "epoch": 2.4, "grad_norm": 8.41140079498291, "learning_rate": 2.063576801731386e-07, "loss": 0.3151, "step": 16544 }, { "epoch": 2.4, "grad_norm": 8.231185913085938, "learning_rate": 2.0626139342488347e-07, "loss": 0.3719, "step": 16545 }, { "epoch": 2.4, "grad_norm": 9.284239768981934, "learning_rate": 2.0616512656245944e-07, "loss": 0.4192, "step": 16546 }, { "epoch": 2.4, "grad_norm": 8.397459030151367, "learning_rate": 2.0606887958827833e-07, "loss": 0.3546, "step": 16547 }, { "epoch": 2.4, "grad_norm": 9.324106216430664, "learning_rate": 2.0597265250475148e-07, "loss": 0.368, "step": 16548 }, { "epoch": 2.4, "grad_norm": 8.216859817504883, "learning_rate": 2.058764453142896e-07, "loss": 0.3322, "step": 16549 }, { "epoch": 2.4, "grad_norm": 8.66543960571289, "learning_rate": 2.057802580193031e-07, "loss": 0.3464, "step": 16550 }, { "epoch": 2.4, "grad_norm": 8.234881401062012, "learning_rate": 2.0568409062220172e-07, "loss": 0.3711, "step": 16551 }, { "epoch": 2.4, "grad_norm": 8.654838562011719, "learning_rate": 2.0558794312539484e-07, "loss": 0.3713, "step": 16552 }, { "epoch": 2.4, "grad_norm": 9.297422409057617, "learning_rate": 2.0549181553129124e-07, "loss": 0.3486, "step": 16553 }, { "epoch": 2.4, "grad_norm": 10.547082901000977, "learning_rate": 2.0539570784229932e-07, "loss": 0.4661, "step": 16554 }, { "epoch": 2.4, "grad_norm": 9.07406997680664, "learning_rate": 2.052996200608268e-07, "loss": 0.3689, "step": 16555 }, { "epoch": 2.4, "grad_norm": 8.371225357055664, "learning_rate": 2.0520355218928143e-07, "loss": 0.3514, "step": 16556 }, { "epoch": 2.4, "grad_norm": 9.13329029083252, "learning_rate": 2.0510750423006896e-07, "loss": 0.3587, "step": 16557 }, { "epoch": 2.4, "grad_norm": 8.066433906555176, "learning_rate": 2.05011476185597e-07, "loss": 0.3213, "step": 16558 }, { "epoch": 2.4, "grad_norm": 7.832482814788818, "learning_rate": 2.049154680582702e-07, "loss": 0.3325, "step": 16559 }, { "epoch": 2.4, "grad_norm": 8.694513320922852, "learning_rate": 2.0481947985049508e-07, "loss": 0.3907, "step": 16560 }, { "epoch": 2.4, "grad_norm": 9.014179229736328, "learning_rate": 2.047235115646754e-07, "loss": 0.3846, "step": 16561 }, { "epoch": 2.4, "grad_norm": 9.88281536102295, "learning_rate": 2.0462756320321628e-07, "loss": 0.455, "step": 16562 }, { "epoch": 2.4, "grad_norm": 7.059006214141846, "learning_rate": 2.0453163476852108e-07, "loss": 0.3295, "step": 16563 }, { "epoch": 2.4, "grad_norm": 9.710428237915039, "learning_rate": 2.0443572626299323e-07, "loss": 0.332, "step": 16564 }, { "epoch": 2.4, "grad_norm": 7.899786472320557, "learning_rate": 2.0433983768903563e-07, "loss": 0.3403, "step": 16565 }, { "epoch": 2.4, "grad_norm": 9.235027313232422, "learning_rate": 2.0424396904905062e-07, "loss": 0.3895, "step": 16566 }, { "epoch": 2.4, "grad_norm": 7.942927837371826, "learning_rate": 2.0414812034543994e-07, "loss": 0.3137, "step": 16567 }, { "epoch": 2.4, "grad_norm": 7.637531280517578, "learning_rate": 2.0405229158060498e-07, "loss": 0.3028, "step": 16568 }, { "epoch": 2.4, "grad_norm": 7.946002006530762, "learning_rate": 2.0395648275694655e-07, "loss": 0.3121, "step": 16569 }, { "epoch": 2.4, "grad_norm": 8.523849487304688, "learning_rate": 2.0386069387686534e-07, "loss": 0.3306, "step": 16570 }, { "epoch": 2.4, "grad_norm": 8.653939247131348, "learning_rate": 2.037649249427603e-07, "loss": 0.343, "step": 16571 }, { "epoch": 2.4, "grad_norm": 9.191261291503906, "learning_rate": 2.0366917595703192e-07, "loss": 0.4, "step": 16572 }, { "epoch": 2.4, "grad_norm": 8.939699172973633, "learning_rate": 2.0357344692207779e-07, "loss": 0.3977, "step": 16573 }, { "epoch": 2.4, "grad_norm": 7.740142345428467, "learning_rate": 2.0347773784029744e-07, "loss": 0.332, "step": 16574 }, { "epoch": 2.4, "grad_norm": 9.890899658203125, "learning_rate": 2.0338204871408771e-07, "loss": 0.3988, "step": 16575 }, { "epoch": 2.41, "grad_norm": 8.615920066833496, "learning_rate": 2.032863795458466e-07, "loss": 0.3737, "step": 16576 }, { "epoch": 2.41, "grad_norm": 7.940731048583984, "learning_rate": 2.0319073033797097e-07, "loss": 0.3373, "step": 16577 }, { "epoch": 2.41, "grad_norm": 8.247377395629883, "learning_rate": 2.0309510109285633e-07, "loss": 0.3568, "step": 16578 }, { "epoch": 2.41, "grad_norm": 8.969161033630371, "learning_rate": 2.0299949181289965e-07, "loss": 0.3641, "step": 16579 }, { "epoch": 2.41, "grad_norm": 9.136537551879883, "learning_rate": 2.0290390250049527e-07, "loss": 0.3679, "step": 16580 }, { "epoch": 2.41, "grad_norm": 8.790328025817871, "learning_rate": 2.0280833315803892e-07, "loss": 0.4414, "step": 16581 }, { "epoch": 2.41, "grad_norm": 8.086395263671875, "learning_rate": 2.0271278378792434e-07, "loss": 0.2939, "step": 16582 }, { "epoch": 2.41, "grad_norm": 8.14338207244873, "learning_rate": 2.026172543925454e-07, "loss": 0.3004, "step": 16583 }, { "epoch": 2.41, "grad_norm": 9.989559173583984, "learning_rate": 2.0252174497429553e-07, "loss": 0.3874, "step": 16584 }, { "epoch": 2.41, "grad_norm": 9.1452054977417, "learning_rate": 2.024262555355676e-07, "loss": 0.3425, "step": 16585 }, { "epoch": 2.41, "grad_norm": 8.862954139709473, "learning_rate": 2.0233078607875387e-07, "loss": 0.3321, "step": 16586 }, { "epoch": 2.41, "grad_norm": 7.955735206604004, "learning_rate": 2.0223533660624624e-07, "loss": 0.3368, "step": 16587 }, { "epoch": 2.41, "grad_norm": 8.451560974121094, "learning_rate": 2.0213990712043606e-07, "loss": 0.3793, "step": 16588 }, { "epoch": 2.41, "grad_norm": 8.542985916137695, "learning_rate": 2.0204449762371412e-07, "loss": 0.3193, "step": 16589 }, { "epoch": 2.41, "grad_norm": 9.34382438659668, "learning_rate": 2.0194910811847076e-07, "loss": 0.4115, "step": 16590 }, { "epoch": 2.41, "grad_norm": 9.740787506103516, "learning_rate": 2.018537386070961e-07, "loss": 0.4383, "step": 16591 }, { "epoch": 2.41, "grad_norm": 8.948534965515137, "learning_rate": 2.0175838909197852e-07, "loss": 0.3987, "step": 16592 }, { "epoch": 2.41, "grad_norm": 8.78354263305664, "learning_rate": 2.0166305957550812e-07, "loss": 0.3874, "step": 16593 }, { "epoch": 2.41, "grad_norm": 9.065536499023438, "learning_rate": 2.01567750060072e-07, "loss": 0.3897, "step": 16594 }, { "epoch": 2.41, "grad_norm": 9.03976058959961, "learning_rate": 2.014724605480591e-07, "loss": 0.3607, "step": 16595 }, { "epoch": 2.41, "grad_norm": 9.405375480651855, "learning_rate": 2.013771910418558e-07, "loss": 0.423, "step": 16596 }, { "epoch": 2.41, "grad_norm": 8.494386672973633, "learning_rate": 2.012819415438498e-07, "loss": 0.3192, "step": 16597 }, { "epoch": 2.41, "grad_norm": 9.425334930419922, "learning_rate": 2.011867120564268e-07, "loss": 0.3492, "step": 16598 }, { "epoch": 2.41, "grad_norm": 9.566065788269043, "learning_rate": 2.010915025819727e-07, "loss": 0.329, "step": 16599 }, { "epoch": 2.41, "grad_norm": 7.403884410858154, "learning_rate": 2.009963131228729e-07, "loss": 0.3413, "step": 16600 }, { "epoch": 2.41, "grad_norm": 8.096529960632324, "learning_rate": 2.0090114368151233e-07, "loss": 0.4014, "step": 16601 }, { "epoch": 2.41, "grad_norm": 8.64130973815918, "learning_rate": 2.008059942602751e-07, "loss": 0.3656, "step": 16602 }, { "epoch": 2.41, "grad_norm": 9.161455154418945, "learning_rate": 2.007108648615452e-07, "loss": 0.3804, "step": 16603 }, { "epoch": 2.41, "grad_norm": 8.397287368774414, "learning_rate": 2.0061575548770594e-07, "loss": 0.3695, "step": 16604 }, { "epoch": 2.41, "grad_norm": 8.692996978759766, "learning_rate": 2.0052066614113994e-07, "loss": 0.3546, "step": 16605 }, { "epoch": 2.41, "grad_norm": 7.880003929138184, "learning_rate": 2.0042559682422967e-07, "loss": 0.383, "step": 16606 }, { "epoch": 2.41, "grad_norm": 7.3406243324279785, "learning_rate": 2.0033054753935696e-07, "loss": 0.2545, "step": 16607 }, { "epoch": 2.41, "grad_norm": 8.146056175231934, "learning_rate": 2.0023551828890295e-07, "loss": 0.3065, "step": 16608 }, { "epoch": 2.41, "grad_norm": 7.831085681915283, "learning_rate": 2.001405090752486e-07, "loss": 0.3113, "step": 16609 }, { "epoch": 2.41, "grad_norm": 8.094979286193848, "learning_rate": 2.000455199007741e-07, "loss": 0.3378, "step": 16610 }, { "epoch": 2.41, "grad_norm": 8.22202205657959, "learning_rate": 1.9995055076785937e-07, "loss": 0.3702, "step": 16611 }, { "epoch": 2.41, "grad_norm": 8.284481048583984, "learning_rate": 1.9985560167888393e-07, "loss": 0.3653, "step": 16612 }, { "epoch": 2.41, "grad_norm": 8.717427253723145, "learning_rate": 1.9976067263622576e-07, "loss": 0.3428, "step": 16613 }, { "epoch": 2.41, "grad_norm": 7.608120441436768, "learning_rate": 1.9966576364226418e-07, "loss": 0.3165, "step": 16614 }, { "epoch": 2.41, "grad_norm": 8.347251892089844, "learning_rate": 1.9957087469937605e-07, "loss": 0.3273, "step": 16615 }, { "epoch": 2.41, "grad_norm": 8.053285598754883, "learning_rate": 1.9947600580993952e-07, "loss": 0.3622, "step": 16616 }, { "epoch": 2.41, "grad_norm": 8.332865715026855, "learning_rate": 1.9938115697633074e-07, "loss": 0.3426, "step": 16617 }, { "epoch": 2.41, "grad_norm": 9.407832145690918, "learning_rate": 1.9928632820092617e-07, "loss": 0.3564, "step": 16618 }, { "epoch": 2.41, "grad_norm": 9.218663215637207, "learning_rate": 1.9919151948610157e-07, "loss": 0.3981, "step": 16619 }, { "epoch": 2.41, "grad_norm": 8.077876091003418, "learning_rate": 1.990967308342324e-07, "loss": 0.3255, "step": 16620 }, { "epoch": 2.41, "grad_norm": 8.342826843261719, "learning_rate": 1.9900196224769318e-07, "loss": 0.3702, "step": 16621 }, { "epoch": 2.41, "grad_norm": 7.83865213394165, "learning_rate": 1.9890721372885833e-07, "loss": 0.3475, "step": 16622 }, { "epoch": 2.41, "grad_norm": 9.51611614227295, "learning_rate": 1.9881248528010163e-07, "loss": 0.4276, "step": 16623 }, { "epoch": 2.41, "grad_norm": 7.4483184814453125, "learning_rate": 1.9871777690379644e-07, "loss": 0.2982, "step": 16624 }, { "epoch": 2.41, "grad_norm": 7.685748100280762, "learning_rate": 1.9862308860231525e-07, "loss": 0.3427, "step": 16625 }, { "epoch": 2.41, "grad_norm": 7.744250297546387, "learning_rate": 1.9852842037803076e-07, "loss": 0.3242, "step": 16626 }, { "epoch": 2.41, "grad_norm": 8.501152992248535, "learning_rate": 1.984337722333139e-07, "loss": 0.377, "step": 16627 }, { "epoch": 2.41, "grad_norm": 8.489811897277832, "learning_rate": 1.983391441705371e-07, "loss": 0.3316, "step": 16628 }, { "epoch": 2.41, "grad_norm": 9.198952674865723, "learning_rate": 1.9824453619206983e-07, "loss": 0.3335, "step": 16629 }, { "epoch": 2.41, "grad_norm": 9.04159164428711, "learning_rate": 1.981499483002833e-07, "loss": 0.3654, "step": 16630 }, { "epoch": 2.41, "grad_norm": 8.987374305725098, "learning_rate": 1.98055380497547e-07, "loss": 0.3403, "step": 16631 }, { "epoch": 2.41, "grad_norm": 8.564448356628418, "learning_rate": 1.979608327862301e-07, "loss": 0.3584, "step": 16632 }, { "epoch": 2.41, "grad_norm": 8.80807113647461, "learning_rate": 1.9786630516870151e-07, "loss": 0.3824, "step": 16633 }, { "epoch": 2.41, "grad_norm": 8.699044227600098, "learning_rate": 1.9777179764732887e-07, "loss": 0.3147, "step": 16634 }, { "epoch": 2.41, "grad_norm": 8.716407775878906, "learning_rate": 1.9767731022448098e-07, "loss": 0.3296, "step": 16635 }, { "epoch": 2.41, "grad_norm": 8.248581886291504, "learning_rate": 1.975828429025238e-07, "loss": 0.3433, "step": 16636 }, { "epoch": 2.41, "grad_norm": 8.782567977905273, "learning_rate": 1.9748839568382524e-07, "loss": 0.3637, "step": 16637 }, { "epoch": 2.41, "grad_norm": 8.351154327392578, "learning_rate": 1.9739396857075075e-07, "loss": 0.3665, "step": 16638 }, { "epoch": 2.41, "grad_norm": 8.17222785949707, "learning_rate": 1.9729956156566617e-07, "loss": 0.3383, "step": 16639 }, { "epoch": 2.41, "grad_norm": 8.323193550109863, "learning_rate": 1.9720517467093701e-07, "loss": 0.3864, "step": 16640 }, { "epoch": 2.41, "grad_norm": 8.256717681884766, "learning_rate": 1.9711080788892763e-07, "loss": 0.3774, "step": 16641 }, { "epoch": 2.41, "grad_norm": 8.531347274780273, "learning_rate": 1.9701646122200244e-07, "loss": 0.3694, "step": 16642 }, { "epoch": 2.41, "grad_norm": 8.256361961364746, "learning_rate": 1.9692213467252516e-07, "loss": 0.3223, "step": 16643 }, { "epoch": 2.41, "grad_norm": 8.444357872009277, "learning_rate": 1.9682782824285894e-07, "loss": 0.3361, "step": 16644 }, { "epoch": 2.42, "grad_norm": 9.260490417480469, "learning_rate": 1.967335419353664e-07, "loss": 0.4376, "step": 16645 }, { "epoch": 2.42, "grad_norm": 9.236734390258789, "learning_rate": 1.966392757524098e-07, "loss": 0.3512, "step": 16646 }, { "epoch": 2.42, "grad_norm": 9.481534004211426, "learning_rate": 1.9654502969635112e-07, "loss": 0.3527, "step": 16647 }, { "epoch": 2.42, "grad_norm": 9.674263000488281, "learning_rate": 1.964508037695507e-07, "loss": 0.3883, "step": 16648 }, { "epoch": 2.42, "grad_norm": 9.655712127685547, "learning_rate": 1.9635659797437033e-07, "loss": 0.297, "step": 16649 }, { "epoch": 2.42, "grad_norm": 8.667600631713867, "learning_rate": 1.962624123131691e-07, "loss": 0.3489, "step": 16650 }, { "epoch": 2.42, "grad_norm": 9.490107536315918, "learning_rate": 1.9616824678830758e-07, "loss": 0.3354, "step": 16651 }, { "epoch": 2.42, "grad_norm": 9.164826393127441, "learning_rate": 1.960741014021442e-07, "loss": 0.3075, "step": 16652 }, { "epoch": 2.42, "grad_norm": 10.762967109680176, "learning_rate": 1.9597997615703832e-07, "loss": 0.3786, "step": 16653 }, { "epoch": 2.42, "grad_norm": 9.837695121765137, "learning_rate": 1.958858710553476e-07, "loss": 0.3927, "step": 16654 }, { "epoch": 2.42, "grad_norm": 9.883488655090332, "learning_rate": 1.9579178609942947e-07, "loss": 0.4192, "step": 16655 }, { "epoch": 2.42, "grad_norm": 9.545011520385742, "learning_rate": 1.9569772129164208e-07, "loss": 0.417, "step": 16656 }, { "epoch": 2.42, "grad_norm": 8.238725662231445, "learning_rate": 1.9560367663434086e-07, "loss": 0.3481, "step": 16657 }, { "epoch": 2.42, "grad_norm": 7.791134834289551, "learning_rate": 1.9550965212988302e-07, "loss": 0.3602, "step": 16658 }, { "epoch": 2.42, "grad_norm": 7.436133861541748, "learning_rate": 1.9541564778062347e-07, "loss": 0.3237, "step": 16659 }, { "epoch": 2.42, "grad_norm": 8.602953910827637, "learning_rate": 1.9532166358891755e-07, "loss": 0.3159, "step": 16660 }, { "epoch": 2.42, "grad_norm": 8.11224365234375, "learning_rate": 1.9522769955711983e-07, "loss": 0.3284, "step": 16661 }, { "epoch": 2.42, "grad_norm": 7.424782752990723, "learning_rate": 1.951337556875845e-07, "loss": 0.3484, "step": 16662 }, { "epoch": 2.42, "grad_norm": 8.129697799682617, "learning_rate": 1.9503983198266516e-07, "loss": 0.2652, "step": 16663 }, { "epoch": 2.42, "grad_norm": 7.715884208679199, "learning_rate": 1.949459284447149e-07, "loss": 0.32, "step": 16664 }, { "epoch": 2.42, "grad_norm": 8.00283432006836, "learning_rate": 1.9485204507608633e-07, "loss": 0.3903, "step": 16665 }, { "epoch": 2.42, "grad_norm": 9.17358684539795, "learning_rate": 1.9475818187913152e-07, "loss": 0.3766, "step": 16666 }, { "epoch": 2.42, "grad_norm": 8.25679874420166, "learning_rate": 1.9466433885620214e-07, "loss": 0.3465, "step": 16667 }, { "epoch": 2.42, "grad_norm": 7.845265865325928, "learning_rate": 1.9457051600964947e-07, "loss": 0.3465, "step": 16668 }, { "epoch": 2.42, "grad_norm": 7.446512699127197, "learning_rate": 1.9447671334182326e-07, "loss": 0.3076, "step": 16669 }, { "epoch": 2.42, "grad_norm": 7.173501014709473, "learning_rate": 1.9438293085507474e-07, "loss": 0.3525, "step": 16670 }, { "epoch": 2.42, "grad_norm": 9.009173393249512, "learning_rate": 1.9428916855175226e-07, "loss": 0.3725, "step": 16671 }, { "epoch": 2.42, "grad_norm": 8.859167098999023, "learning_rate": 1.9419542643420617e-07, "loss": 0.3669, "step": 16672 }, { "epoch": 2.42, "grad_norm": 7.377241134643555, "learning_rate": 1.9410170450478403e-07, "loss": 0.3138, "step": 16673 }, { "epoch": 2.42, "grad_norm": 7.735968589782715, "learning_rate": 1.9400800276583428e-07, "loss": 0.3131, "step": 16674 }, { "epoch": 2.42, "grad_norm": 10.212067604064941, "learning_rate": 1.939143212197044e-07, "loss": 0.3919, "step": 16675 }, { "epoch": 2.42, "grad_norm": 9.159514427185059, "learning_rate": 1.938206598687415e-07, "loss": 0.413, "step": 16676 }, { "epoch": 2.42, "grad_norm": 9.007533073425293, "learning_rate": 1.9372701871529207e-07, "loss": 0.3759, "step": 16677 }, { "epoch": 2.42, "grad_norm": 8.565925598144531, "learning_rate": 1.9363339776170206e-07, "loss": 0.3404, "step": 16678 }, { "epoch": 2.42, "grad_norm": 8.314911842346191, "learning_rate": 1.9353979701031708e-07, "loss": 0.32, "step": 16679 }, { "epoch": 2.42, "grad_norm": 8.73574447631836, "learning_rate": 1.9344621646348215e-07, "loss": 0.3769, "step": 16680 }, { "epoch": 2.42, "grad_norm": 9.434381484985352, "learning_rate": 1.933526561235418e-07, "loss": 0.3805, "step": 16681 }, { "epoch": 2.42, "grad_norm": 9.149015426635742, "learning_rate": 1.9325911599283984e-07, "loss": 0.3735, "step": 16682 }, { "epoch": 2.42, "grad_norm": 8.535039901733398, "learning_rate": 1.9316559607372007e-07, "loss": 0.327, "step": 16683 }, { "epoch": 2.42, "grad_norm": 8.49827766418457, "learning_rate": 1.930720963685253e-07, "loss": 0.3525, "step": 16684 }, { "epoch": 2.42, "grad_norm": 8.752522468566895, "learning_rate": 1.9297861687959793e-07, "loss": 0.3503, "step": 16685 }, { "epoch": 2.42, "grad_norm": 8.47122859954834, "learning_rate": 1.9288515760928015e-07, "loss": 0.3586, "step": 16686 }, { "epoch": 2.42, "grad_norm": 7.723323822021484, "learning_rate": 1.9279171855991327e-07, "loss": 0.3649, "step": 16687 }, { "epoch": 2.42, "grad_norm": 8.209881782531738, "learning_rate": 1.926982997338382e-07, "loss": 0.3011, "step": 16688 }, { "epoch": 2.42, "grad_norm": 8.669563293457031, "learning_rate": 1.926049011333959e-07, "loss": 0.3898, "step": 16689 }, { "epoch": 2.42, "grad_norm": 7.839846134185791, "learning_rate": 1.9251152276092541e-07, "loss": 0.3483, "step": 16690 }, { "epoch": 2.42, "grad_norm": 8.098065376281738, "learning_rate": 1.9241816461876713e-07, "loss": 0.3252, "step": 16691 }, { "epoch": 2.42, "grad_norm": 7.456903457641602, "learning_rate": 1.923248267092591e-07, "loss": 0.3086, "step": 16692 }, { "epoch": 2.42, "grad_norm": 7.409869194030762, "learning_rate": 1.9223150903474073e-07, "loss": 0.3243, "step": 16693 }, { "epoch": 2.42, "grad_norm": 9.61705207824707, "learning_rate": 1.9213821159754918e-07, "loss": 0.4174, "step": 16694 }, { "epoch": 2.42, "grad_norm": 8.155699729919434, "learning_rate": 1.92044934400022e-07, "loss": 0.3664, "step": 16695 }, { "epoch": 2.42, "grad_norm": 8.597496032714844, "learning_rate": 1.9195167744449625e-07, "loss": 0.3137, "step": 16696 }, { "epoch": 2.42, "grad_norm": 8.100730895996094, "learning_rate": 1.9185844073330827e-07, "loss": 0.3277, "step": 16697 }, { "epoch": 2.42, "grad_norm": 9.067426681518555, "learning_rate": 1.9176522426879393e-07, "loss": 0.3193, "step": 16698 }, { "epoch": 2.42, "grad_norm": 8.171228408813477, "learning_rate": 1.9167202805328874e-07, "loss": 0.3668, "step": 16699 }, { "epoch": 2.42, "grad_norm": 9.3991060256958, "learning_rate": 1.915788520891274e-07, "loss": 0.3774, "step": 16700 }, { "epoch": 2.42, "grad_norm": 9.26989459991455, "learning_rate": 1.9148569637864443e-07, "loss": 0.3888, "step": 16701 }, { "epoch": 2.42, "grad_norm": 7.699696063995361, "learning_rate": 1.9139256092417366e-07, "loss": 0.3644, "step": 16702 }, { "epoch": 2.42, "grad_norm": 7.950167179107666, "learning_rate": 1.912994457280487e-07, "loss": 0.3198, "step": 16703 }, { "epoch": 2.42, "grad_norm": 8.466930389404297, "learning_rate": 1.912063507926015e-07, "loss": 0.3492, "step": 16704 }, { "epoch": 2.42, "grad_norm": 7.354866027832031, "learning_rate": 1.911132761201656e-07, "loss": 0.338, "step": 16705 }, { "epoch": 2.42, "grad_norm": 9.026239395141602, "learning_rate": 1.9102022171307176e-07, "loss": 0.4004, "step": 16706 }, { "epoch": 2.42, "grad_norm": 8.26794719696045, "learning_rate": 1.9092718757365213e-07, "loss": 0.3737, "step": 16707 }, { "epoch": 2.42, "grad_norm": 8.18643569946289, "learning_rate": 1.9083417370423737e-07, "loss": 0.354, "step": 16708 }, { "epoch": 2.42, "grad_norm": 8.761853218078613, "learning_rate": 1.9074118010715723e-07, "loss": 0.3262, "step": 16709 }, { "epoch": 2.42, "grad_norm": 8.209969520568848, "learning_rate": 1.9064820678474226e-07, "loss": 0.3735, "step": 16710 }, { "epoch": 2.42, "grad_norm": 8.526103973388672, "learning_rate": 1.9055525373932103e-07, "loss": 0.3837, "step": 16711 }, { "epoch": 2.42, "grad_norm": 9.26633358001709, "learning_rate": 1.9046232097322323e-07, "loss": 0.3956, "step": 16712 }, { "epoch": 2.42, "grad_norm": 7.379636287689209, "learning_rate": 1.9036940848877602e-07, "loss": 0.3117, "step": 16713 }, { "epoch": 2.43, "grad_norm": 8.167448043823242, "learning_rate": 1.9027651628830833e-07, "loss": 0.3555, "step": 16714 }, { "epoch": 2.43, "grad_norm": 8.37159252166748, "learning_rate": 1.9018364437414659e-07, "loss": 0.3704, "step": 16715 }, { "epoch": 2.43, "grad_norm": 7.170681953430176, "learning_rate": 1.9009079274861782e-07, "loss": 0.317, "step": 16716 }, { "epoch": 2.43, "grad_norm": 8.669994354248047, "learning_rate": 1.8999796141404834e-07, "loss": 0.3464, "step": 16717 }, { "epoch": 2.43, "grad_norm": 8.62604808807373, "learning_rate": 1.8990515037276366e-07, "loss": 0.3567, "step": 16718 }, { "epoch": 2.43, "grad_norm": 8.678741455078125, "learning_rate": 1.898123596270893e-07, "loss": 0.3602, "step": 16719 }, { "epoch": 2.43, "grad_norm": 8.642441749572754, "learning_rate": 1.8971958917934983e-07, "loss": 0.3709, "step": 16720 }, { "epoch": 2.43, "grad_norm": 8.34488296508789, "learning_rate": 1.896268390318695e-07, "loss": 0.3655, "step": 16721 }, { "epoch": 2.43, "grad_norm": 9.212162971496582, "learning_rate": 1.89534109186972e-07, "loss": 0.3199, "step": 16722 }, { "epoch": 2.43, "grad_norm": 8.735093116760254, "learning_rate": 1.8944139964698047e-07, "loss": 0.3163, "step": 16723 }, { "epoch": 2.43, "grad_norm": 6.88692045211792, "learning_rate": 1.8934871041421795e-07, "loss": 0.27, "step": 16724 }, { "epoch": 2.43, "grad_norm": 7.457326889038086, "learning_rate": 1.8925604149100584e-07, "loss": 0.2918, "step": 16725 }, { "epoch": 2.43, "grad_norm": 8.424765586853027, "learning_rate": 1.8916339287966675e-07, "loss": 0.3991, "step": 16726 }, { "epoch": 2.43, "grad_norm": 11.281974792480469, "learning_rate": 1.8907076458252092e-07, "loss": 0.3979, "step": 16727 }, { "epoch": 2.43, "grad_norm": 8.340546607971191, "learning_rate": 1.8897815660189e-07, "loss": 0.3344, "step": 16728 }, { "epoch": 2.43, "grad_norm": 9.93720531463623, "learning_rate": 1.888855689400932e-07, "loss": 0.3902, "step": 16729 }, { "epoch": 2.43, "grad_norm": 8.526101112365723, "learning_rate": 1.8879300159945066e-07, "loss": 0.3495, "step": 16730 }, { "epoch": 2.43, "grad_norm": 8.405752182006836, "learning_rate": 1.8870045458228133e-07, "loss": 0.3547, "step": 16731 }, { "epoch": 2.43, "grad_norm": 8.600584030151367, "learning_rate": 1.8860792789090395e-07, "loss": 0.3458, "step": 16732 }, { "epoch": 2.43, "grad_norm": 9.427349090576172, "learning_rate": 1.8851542152763655e-07, "loss": 0.3647, "step": 16733 }, { "epoch": 2.43, "grad_norm": 7.78668212890625, "learning_rate": 1.884229354947965e-07, "loss": 0.3389, "step": 16734 }, { "epoch": 2.43, "grad_norm": 8.089808464050293, "learning_rate": 1.8833046979470158e-07, "loss": 0.3296, "step": 16735 }, { "epoch": 2.43, "grad_norm": 9.244189262390137, "learning_rate": 1.8823802442966773e-07, "loss": 0.3617, "step": 16736 }, { "epoch": 2.43, "grad_norm": 7.784654140472412, "learning_rate": 1.8814559940201112e-07, "loss": 0.326, "step": 16737 }, { "epoch": 2.43, "grad_norm": 8.691422462463379, "learning_rate": 1.8805319471404746e-07, "loss": 0.393, "step": 16738 }, { "epoch": 2.43, "grad_norm": 8.232892036437988, "learning_rate": 1.8796081036809164e-07, "loss": 0.3352, "step": 16739 }, { "epoch": 2.43, "grad_norm": 9.882671356201172, "learning_rate": 1.8786844636645838e-07, "loss": 0.4248, "step": 16740 }, { "epoch": 2.43, "grad_norm": 9.065475463867188, "learning_rate": 1.877761027114616e-07, "loss": 0.3801, "step": 16741 }, { "epoch": 2.43, "grad_norm": 8.781085968017578, "learning_rate": 1.876837794054148e-07, "loss": 0.3065, "step": 16742 }, { "epoch": 2.43, "grad_norm": 8.437765121459961, "learning_rate": 1.8759147645063112e-07, "loss": 0.3648, "step": 16743 }, { "epoch": 2.43, "grad_norm": 9.706040382385254, "learning_rate": 1.8749919384942292e-07, "loss": 0.3832, "step": 16744 }, { "epoch": 2.43, "grad_norm": 9.688887596130371, "learning_rate": 1.874069316041025e-07, "loss": 0.3745, "step": 16745 }, { "epoch": 2.43, "grad_norm": 9.210541725158691, "learning_rate": 1.873146897169806e-07, "loss": 0.3803, "step": 16746 }, { "epoch": 2.43, "grad_norm": 7.8191819190979, "learning_rate": 1.8722246819036913e-07, "loss": 0.3344, "step": 16747 }, { "epoch": 2.43, "grad_norm": 8.013238906860352, "learning_rate": 1.8713026702657764e-07, "loss": 0.3202, "step": 16748 }, { "epoch": 2.43, "grad_norm": 9.014205932617188, "learning_rate": 1.8703808622791696e-07, "loss": 0.3981, "step": 16749 }, { "epoch": 2.43, "grad_norm": 9.154133796691895, "learning_rate": 1.8694592579669598e-07, "loss": 0.3777, "step": 16750 }, { "epoch": 2.43, "grad_norm": 9.28354263305664, "learning_rate": 1.868537857352237e-07, "loss": 0.3732, "step": 16751 }, { "epoch": 2.43, "grad_norm": 8.412118911743164, "learning_rate": 1.8676166604580857e-07, "loss": 0.3301, "step": 16752 }, { "epoch": 2.43, "grad_norm": 9.458791732788086, "learning_rate": 1.8666956673075852e-07, "loss": 0.4191, "step": 16753 }, { "epoch": 2.43, "grad_norm": 8.416316032409668, "learning_rate": 1.865774877923809e-07, "loss": 0.3031, "step": 16754 }, { "epoch": 2.43, "grad_norm": 8.545156478881836, "learning_rate": 1.8648542923298272e-07, "loss": 0.3551, "step": 16755 }, { "epoch": 2.43, "grad_norm": 9.300186157226562, "learning_rate": 1.8639339105487027e-07, "loss": 0.3951, "step": 16756 }, { "epoch": 2.43, "grad_norm": 7.2692131996154785, "learning_rate": 1.8630137326034944e-07, "loss": 0.3102, "step": 16757 }, { "epoch": 2.43, "grad_norm": 8.248513221740723, "learning_rate": 1.862093758517257e-07, "loss": 0.3393, "step": 16758 }, { "epoch": 2.43, "grad_norm": 8.552505493164062, "learning_rate": 1.861173988313036e-07, "loss": 0.3712, "step": 16759 }, { "epoch": 2.43, "grad_norm": 8.241833686828613, "learning_rate": 1.860254422013877e-07, "loss": 0.3343, "step": 16760 }, { "epoch": 2.43, "grad_norm": 8.227002143859863, "learning_rate": 1.8593350596428182e-07, "loss": 0.3052, "step": 16761 }, { "epoch": 2.43, "grad_norm": 7.350864887237549, "learning_rate": 1.8584159012228916e-07, "loss": 0.3116, "step": 16762 }, { "epoch": 2.43, "grad_norm": 7.713221073150635, "learning_rate": 1.8574969467771274e-07, "loss": 0.3264, "step": 16763 }, { "epoch": 2.43, "grad_norm": 8.460415840148926, "learning_rate": 1.8565781963285475e-07, "loss": 0.336, "step": 16764 }, { "epoch": 2.43, "grad_norm": 11.199604034423828, "learning_rate": 1.8556596499001654e-07, "loss": 0.3412, "step": 16765 }, { "epoch": 2.43, "grad_norm": 7.235615253448486, "learning_rate": 1.8547413075150032e-07, "loss": 0.3257, "step": 16766 }, { "epoch": 2.43, "grad_norm": 8.09929084777832, "learning_rate": 1.8538231691960572e-07, "loss": 0.3662, "step": 16767 }, { "epoch": 2.43, "grad_norm": 8.722390174865723, "learning_rate": 1.8529052349663398e-07, "loss": 0.3651, "step": 16768 }, { "epoch": 2.43, "grad_norm": 8.712095260620117, "learning_rate": 1.85198750484884e-07, "loss": 0.3566, "step": 16769 }, { "epoch": 2.43, "grad_norm": 9.056683540344238, "learning_rate": 1.8510699788665586e-07, "loss": 0.4064, "step": 16770 }, { "epoch": 2.43, "grad_norm": 7.97509765625, "learning_rate": 1.8501526570424763e-07, "loss": 0.3281, "step": 16771 }, { "epoch": 2.43, "grad_norm": 7.849120616912842, "learning_rate": 1.8492355393995774e-07, "loss": 0.3631, "step": 16772 }, { "epoch": 2.43, "grad_norm": 7.5644073486328125, "learning_rate": 1.8483186259608374e-07, "loss": 0.3454, "step": 16773 }, { "epoch": 2.43, "grad_norm": 9.179161071777344, "learning_rate": 1.8474019167492295e-07, "loss": 0.4197, "step": 16774 }, { "epoch": 2.43, "grad_norm": 7.2127861976623535, "learning_rate": 1.8464854117877205e-07, "loss": 0.331, "step": 16775 }, { "epoch": 2.43, "grad_norm": 9.208351135253906, "learning_rate": 1.845569111099271e-07, "loss": 0.333, "step": 16776 }, { "epoch": 2.43, "grad_norm": 9.035239219665527, "learning_rate": 1.8446530147068373e-07, "loss": 0.3897, "step": 16777 }, { "epoch": 2.43, "grad_norm": 10.492286682128906, "learning_rate": 1.843737122633372e-07, "loss": 0.4038, "step": 16778 }, { "epoch": 2.43, "grad_norm": 7.915157794952393, "learning_rate": 1.8428214349018212e-07, "loss": 0.3698, "step": 16779 }, { "epoch": 2.43, "grad_norm": 8.364523887634277, "learning_rate": 1.8419059515351265e-07, "loss": 0.33, "step": 16780 }, { "epoch": 2.43, "grad_norm": 7.792840480804443, "learning_rate": 1.8409906725562186e-07, "loss": 0.3408, "step": 16781 }, { "epoch": 2.43, "grad_norm": 9.194172859191895, "learning_rate": 1.8400755979880366e-07, "loss": 0.4015, "step": 16782 }, { "epoch": 2.44, "grad_norm": 9.39375114440918, "learning_rate": 1.8391607278534982e-07, "loss": 0.4235, "step": 16783 }, { "epoch": 2.44, "grad_norm": 7.864284992218018, "learning_rate": 1.8382460621755313e-07, "loss": 0.3491, "step": 16784 }, { "epoch": 2.44, "grad_norm": 9.64098072052002, "learning_rate": 1.837331600977047e-07, "loss": 0.3809, "step": 16785 }, { "epoch": 2.44, "grad_norm": 9.667956352233887, "learning_rate": 1.8364173442809528e-07, "loss": 0.3381, "step": 16786 }, { "epoch": 2.44, "grad_norm": 7.967982292175293, "learning_rate": 1.8355032921101632e-07, "loss": 0.3614, "step": 16787 }, { "epoch": 2.44, "grad_norm": 8.556632041931152, "learning_rate": 1.8345894444875664e-07, "loss": 0.3034, "step": 16788 }, { "epoch": 2.44, "grad_norm": 9.438982963562012, "learning_rate": 1.833675801436071e-07, "loss": 0.3169, "step": 16789 }, { "epoch": 2.44, "grad_norm": 8.692110061645508, "learning_rate": 1.832762362978555e-07, "loss": 0.3465, "step": 16790 }, { "epoch": 2.44, "grad_norm": 8.266374588012695, "learning_rate": 1.831849129137909e-07, "loss": 0.3484, "step": 16791 }, { "epoch": 2.44, "grad_norm": 9.086352348327637, "learning_rate": 1.8309360999370104e-07, "loss": 0.3582, "step": 16792 }, { "epoch": 2.44, "grad_norm": 8.30762004852295, "learning_rate": 1.8300232753987342e-07, "loss": 0.3198, "step": 16793 }, { "epoch": 2.44, "grad_norm": 9.140399932861328, "learning_rate": 1.8291106555459502e-07, "loss": 0.3912, "step": 16794 }, { "epoch": 2.44, "grad_norm": 8.958887100219727, "learning_rate": 1.828198240401523e-07, "loss": 0.3729, "step": 16795 }, { "epoch": 2.44, "grad_norm": 8.083625793457031, "learning_rate": 1.8272860299883108e-07, "loss": 0.3755, "step": 16796 }, { "epoch": 2.44, "grad_norm": 9.30631160736084, "learning_rate": 1.8263740243291682e-07, "loss": 0.3808, "step": 16797 }, { "epoch": 2.44, "grad_norm": 8.92190933227539, "learning_rate": 1.825462223446943e-07, "loss": 0.3715, "step": 16798 }, { "epoch": 2.44, "grad_norm": 9.652265548706055, "learning_rate": 1.8245506273644828e-07, "loss": 0.3467, "step": 16799 }, { "epoch": 2.44, "grad_norm": 7.9610676765441895, "learning_rate": 1.8236392361046172e-07, "loss": 0.3322, "step": 16800 }, { "epoch": 2.44, "grad_norm": 8.280068397521973, "learning_rate": 1.822728049690191e-07, "loss": 0.3521, "step": 16801 }, { "epoch": 2.44, "grad_norm": 7.942237377166748, "learning_rate": 1.8218170681440214e-07, "loss": 0.3629, "step": 16802 }, { "epoch": 2.44, "grad_norm": 8.447318077087402, "learning_rate": 1.8209062914889417e-07, "loss": 0.3225, "step": 16803 }, { "epoch": 2.44, "grad_norm": 8.14879035949707, "learning_rate": 1.8199957197477612e-07, "loss": 0.3615, "step": 16804 }, { "epoch": 2.44, "grad_norm": 8.228218078613281, "learning_rate": 1.8190853529433016e-07, "loss": 0.3684, "step": 16805 }, { "epoch": 2.44, "grad_norm": 7.795161247253418, "learning_rate": 1.8181751910983621e-07, "loss": 0.3356, "step": 16806 }, { "epoch": 2.44, "grad_norm": 8.519684791564941, "learning_rate": 1.817265234235751e-07, "loss": 0.311, "step": 16807 }, { "epoch": 2.44, "grad_norm": 8.249068260192871, "learning_rate": 1.8163554823782623e-07, "loss": 0.3425, "step": 16808 }, { "epoch": 2.44, "grad_norm": 9.857253074645996, "learning_rate": 1.8154459355486906e-07, "loss": 0.3516, "step": 16809 }, { "epoch": 2.44, "grad_norm": 9.477477073669434, "learning_rate": 1.8145365937698232e-07, "loss": 0.3492, "step": 16810 }, { "epoch": 2.44, "grad_norm": 8.630702018737793, "learning_rate": 1.8136274570644404e-07, "loss": 0.3622, "step": 16811 }, { "epoch": 2.44, "grad_norm": 8.54217529296875, "learning_rate": 1.8127185254553213e-07, "loss": 0.3521, "step": 16812 }, { "epoch": 2.44, "grad_norm": 9.961804389953613, "learning_rate": 1.811809798965236e-07, "loss": 0.3685, "step": 16813 }, { "epoch": 2.44, "grad_norm": 7.50648832321167, "learning_rate": 1.8109012776169507e-07, "loss": 0.3329, "step": 16814 }, { "epoch": 2.44, "grad_norm": 9.0849609375, "learning_rate": 1.8099929614332298e-07, "loss": 0.3436, "step": 16815 }, { "epoch": 2.44, "grad_norm": 9.016990661621094, "learning_rate": 1.8090848504368271e-07, "loss": 0.3417, "step": 16816 }, { "epoch": 2.44, "grad_norm": 7.438923358917236, "learning_rate": 1.8081769446504946e-07, "loss": 0.3558, "step": 16817 }, { "epoch": 2.44, "grad_norm": 8.779622077941895, "learning_rate": 1.8072692440969784e-07, "loss": 0.3659, "step": 16818 }, { "epoch": 2.44, "grad_norm": 8.09286880493164, "learning_rate": 1.806361748799019e-07, "loss": 0.3406, "step": 16819 }, { "epoch": 2.44, "grad_norm": 8.165639877319336, "learning_rate": 1.8054544587793563e-07, "loss": 0.3668, "step": 16820 }, { "epoch": 2.44, "grad_norm": 8.225138664245605, "learning_rate": 1.804547374060712e-07, "loss": 0.3253, "step": 16821 }, { "epoch": 2.44, "grad_norm": 7.6994733810424805, "learning_rate": 1.8036404946658223e-07, "loss": 0.3309, "step": 16822 }, { "epoch": 2.44, "grad_norm": 8.519797325134277, "learning_rate": 1.802733820617396e-07, "loss": 0.377, "step": 16823 }, { "epoch": 2.44, "grad_norm": 9.332784652709961, "learning_rate": 1.801827351938161e-07, "loss": 0.3449, "step": 16824 }, { "epoch": 2.44, "grad_norm": 10.042460441589355, "learning_rate": 1.8009210886508152e-07, "loss": 0.4126, "step": 16825 }, { "epoch": 2.44, "grad_norm": 7.753299236297607, "learning_rate": 1.800015030778076e-07, "loss": 0.3257, "step": 16826 }, { "epoch": 2.44, "grad_norm": 8.455437660217285, "learning_rate": 1.7991091783426326e-07, "loss": 0.3784, "step": 16827 }, { "epoch": 2.44, "grad_norm": 8.432819366455078, "learning_rate": 1.7982035313671839e-07, "loss": 0.3722, "step": 16828 }, { "epoch": 2.44, "grad_norm": 9.597668647766113, "learning_rate": 1.7972980898744195e-07, "loss": 0.3794, "step": 16829 }, { "epoch": 2.44, "grad_norm": 7.3296589851379395, "learning_rate": 1.7963928538870232e-07, "loss": 0.3386, "step": 16830 }, { "epoch": 2.44, "grad_norm": 7.928513526916504, "learning_rate": 1.795487823427675e-07, "loss": 0.2951, "step": 16831 }, { "epoch": 2.44, "grad_norm": 8.036867141723633, "learning_rate": 1.7945829985190486e-07, "loss": 0.3678, "step": 16832 }, { "epoch": 2.44, "grad_norm": 7.6033148765563965, "learning_rate": 1.7936783791838117e-07, "loss": 0.2946, "step": 16833 }, { "epoch": 2.44, "grad_norm": 9.294517517089844, "learning_rate": 1.7927739654446304e-07, "loss": 0.4294, "step": 16834 }, { "epoch": 2.44, "grad_norm": 8.600322723388672, "learning_rate": 1.7918697573241615e-07, "loss": 0.3784, "step": 16835 }, { "epoch": 2.44, "grad_norm": 9.997432708740234, "learning_rate": 1.7909657548450607e-07, "loss": 0.366, "step": 16836 }, { "epoch": 2.44, "grad_norm": 8.152520179748535, "learning_rate": 1.7900619580299703e-07, "loss": 0.3168, "step": 16837 }, { "epoch": 2.44, "grad_norm": 8.534735679626465, "learning_rate": 1.7891583669015408e-07, "loss": 0.3031, "step": 16838 }, { "epoch": 2.44, "grad_norm": 8.317534446716309, "learning_rate": 1.7882549814824066e-07, "loss": 0.3169, "step": 16839 }, { "epoch": 2.44, "grad_norm": 8.821355819702148, "learning_rate": 1.7873518017952017e-07, "loss": 0.3083, "step": 16840 }, { "epoch": 2.44, "grad_norm": 9.052373886108398, "learning_rate": 1.7864488278625555e-07, "loss": 0.3393, "step": 16841 }, { "epoch": 2.44, "grad_norm": 9.885637283325195, "learning_rate": 1.7855460597070825e-07, "loss": 0.346, "step": 16842 }, { "epoch": 2.44, "grad_norm": 8.308158874511719, "learning_rate": 1.7846434973514124e-07, "loss": 0.293, "step": 16843 }, { "epoch": 2.44, "grad_norm": 8.652284622192383, "learning_rate": 1.7837411408181446e-07, "loss": 0.3921, "step": 16844 }, { "epoch": 2.44, "grad_norm": 9.208667755126953, "learning_rate": 1.782838990129898e-07, "loss": 0.3977, "step": 16845 }, { "epoch": 2.44, "grad_norm": 8.928372383117676, "learning_rate": 1.7819370453092663e-07, "loss": 0.3566, "step": 16846 }, { "epoch": 2.44, "grad_norm": 7.57986307144165, "learning_rate": 1.7810353063788498e-07, "loss": 0.3101, "step": 16847 }, { "epoch": 2.44, "grad_norm": 7.767282962799072, "learning_rate": 1.7801337733612386e-07, "loss": 0.3682, "step": 16848 }, { "epoch": 2.44, "grad_norm": 8.694317817687988, "learning_rate": 1.7792324462790199e-07, "loss": 0.3539, "step": 16849 }, { "epoch": 2.44, "grad_norm": 8.358687400817871, "learning_rate": 1.778331325154775e-07, "loss": 0.3789, "step": 16850 }, { "epoch": 2.45, "grad_norm": 8.650394439697266, "learning_rate": 1.7774304100110794e-07, "loss": 0.3695, "step": 16851 }, { "epoch": 2.45, "grad_norm": 8.815667152404785, "learning_rate": 1.7765297008705048e-07, "loss": 0.4174, "step": 16852 }, { "epoch": 2.45, "grad_norm": 9.239129066467285, "learning_rate": 1.775629197755617e-07, "loss": 0.3712, "step": 16853 }, { "epoch": 2.45, "grad_norm": 8.860859870910645, "learning_rate": 1.7747289006889775e-07, "loss": 0.3546, "step": 16854 }, { "epoch": 2.45, "grad_norm": 9.173589706420898, "learning_rate": 1.773828809693142e-07, "loss": 0.3665, "step": 16855 }, { "epoch": 2.45, "grad_norm": 8.678974151611328, "learning_rate": 1.7729289247906543e-07, "loss": 0.3845, "step": 16856 }, { "epoch": 2.45, "grad_norm": 7.62113618850708, "learning_rate": 1.772029246004071e-07, "loss": 0.3573, "step": 16857 }, { "epoch": 2.45, "grad_norm": 7.421831130981445, "learning_rate": 1.7711297733559205e-07, "loss": 0.3298, "step": 16858 }, { "epoch": 2.45, "grad_norm": 7.945576190948486, "learning_rate": 1.7702305068687472e-07, "loss": 0.3425, "step": 16859 }, { "epoch": 2.45, "grad_norm": 9.654955863952637, "learning_rate": 1.769331446565072e-07, "loss": 0.3767, "step": 16860 }, { "epoch": 2.45, "grad_norm": 7.922113418579102, "learning_rate": 1.7684325924674292e-07, "loss": 0.3604, "step": 16861 }, { "epoch": 2.45, "grad_norm": 10.77694320678711, "learning_rate": 1.7675339445983306e-07, "loss": 0.3663, "step": 16862 }, { "epoch": 2.45, "grad_norm": 7.586583137512207, "learning_rate": 1.7666355029802927e-07, "loss": 0.3262, "step": 16863 }, { "epoch": 2.45, "grad_norm": 7.688415050506592, "learning_rate": 1.765737267635824e-07, "loss": 0.3218, "step": 16864 }, { "epoch": 2.45, "grad_norm": 7.908691883087158, "learning_rate": 1.7648392385874256e-07, "loss": 0.3438, "step": 16865 }, { "epoch": 2.45, "grad_norm": 8.842421531677246, "learning_rate": 1.763941415857606e-07, "loss": 0.304, "step": 16866 }, { "epoch": 2.45, "grad_norm": 8.85330581665039, "learning_rate": 1.7630437994688486e-07, "loss": 0.3443, "step": 16867 }, { "epoch": 2.45, "grad_norm": 7.884109973907471, "learning_rate": 1.762146389443645e-07, "loss": 0.3416, "step": 16868 }, { "epoch": 2.45, "grad_norm": 8.914682388305664, "learning_rate": 1.7612491858044787e-07, "loss": 0.3386, "step": 16869 }, { "epoch": 2.45, "grad_norm": 9.617191314697266, "learning_rate": 1.7603521885738281e-07, "loss": 0.3829, "step": 16870 }, { "epoch": 2.45, "grad_norm": 8.063114166259766, "learning_rate": 1.7594553977741645e-07, "loss": 0.2826, "step": 16871 }, { "epoch": 2.45, "grad_norm": 10.83471393585205, "learning_rate": 1.7585588134279572e-07, "loss": 0.3997, "step": 16872 }, { "epoch": 2.45, "grad_norm": 8.226753234863281, "learning_rate": 1.7576624355576674e-07, "loss": 0.3128, "step": 16873 }, { "epoch": 2.45, "grad_norm": 7.573198318481445, "learning_rate": 1.756766264185753e-07, "loss": 0.3248, "step": 16874 }, { "epoch": 2.45, "grad_norm": 9.247591018676758, "learning_rate": 1.7558702993346651e-07, "loss": 0.3542, "step": 16875 }, { "epoch": 2.45, "grad_norm": 9.83503246307373, "learning_rate": 1.7549745410268557e-07, "loss": 0.4724, "step": 16876 }, { "epoch": 2.45, "grad_norm": 9.308256149291992, "learning_rate": 1.7540789892847562e-07, "loss": 0.3849, "step": 16877 }, { "epoch": 2.45, "grad_norm": 8.602892875671387, "learning_rate": 1.7531836441308146e-07, "loss": 0.346, "step": 16878 }, { "epoch": 2.45, "grad_norm": 11.010574340820312, "learning_rate": 1.7522885055874526e-07, "loss": 0.3947, "step": 16879 }, { "epoch": 2.45, "grad_norm": 8.480884552001953, "learning_rate": 1.7513935736771057e-07, "loss": 0.3989, "step": 16880 }, { "epoch": 2.45, "grad_norm": 8.800278663635254, "learning_rate": 1.750498848422185e-07, "loss": 0.3898, "step": 16881 }, { "epoch": 2.45, "grad_norm": 9.576764106750488, "learning_rate": 1.749604329845118e-07, "loss": 0.3835, "step": 16882 }, { "epoch": 2.45, "grad_norm": 8.417020797729492, "learning_rate": 1.7487100179683045e-07, "loss": 0.3541, "step": 16883 }, { "epoch": 2.45, "grad_norm": 7.156625270843506, "learning_rate": 1.7478159128141556e-07, "loss": 0.309, "step": 16884 }, { "epoch": 2.45, "grad_norm": 8.814102172851562, "learning_rate": 1.746922014405071e-07, "loss": 0.3674, "step": 16885 }, { "epoch": 2.45, "grad_norm": 9.790718078613281, "learning_rate": 1.7460283227634454e-07, "loss": 0.3014, "step": 16886 }, { "epoch": 2.45, "grad_norm": 9.5770845413208, "learning_rate": 1.745134837911668e-07, "loss": 0.3474, "step": 16887 }, { "epoch": 2.45, "grad_norm": 8.573142051696777, "learning_rate": 1.7442415598721249e-07, "loss": 0.3693, "step": 16888 }, { "epoch": 2.45, "grad_norm": 8.910889625549316, "learning_rate": 1.7433484886671956e-07, "loss": 0.3233, "step": 16889 }, { "epoch": 2.45, "grad_norm": 8.80321216583252, "learning_rate": 1.7424556243192535e-07, "loss": 0.2929, "step": 16890 }, { "epoch": 2.45, "grad_norm": 8.355158805847168, "learning_rate": 1.7415629668506694e-07, "loss": 0.3729, "step": 16891 }, { "epoch": 2.45, "grad_norm": 8.040461540222168, "learning_rate": 1.7406705162838065e-07, "loss": 0.3571, "step": 16892 }, { "epoch": 2.45, "grad_norm": 8.51304817199707, "learning_rate": 1.7397782726410236e-07, "loss": 0.3687, "step": 16893 }, { "epoch": 2.45, "grad_norm": 8.343355178833008, "learning_rate": 1.7388862359446753e-07, "loss": 0.3721, "step": 16894 }, { "epoch": 2.45, "grad_norm": 7.92132043838501, "learning_rate": 1.7379944062171082e-07, "loss": 0.3698, "step": 16895 }, { "epoch": 2.45, "grad_norm": 8.72795295715332, "learning_rate": 1.7371027834806684e-07, "loss": 0.3347, "step": 16896 }, { "epoch": 2.45, "grad_norm": 7.238096714019775, "learning_rate": 1.736211367757694e-07, "loss": 0.3562, "step": 16897 }, { "epoch": 2.45, "grad_norm": 7.451086044311523, "learning_rate": 1.7353201590705113e-07, "loss": 0.3383, "step": 16898 }, { "epoch": 2.45, "grad_norm": 9.070732116699219, "learning_rate": 1.7344291574414593e-07, "loss": 0.4111, "step": 16899 }, { "epoch": 2.45, "grad_norm": 7.675278186798096, "learning_rate": 1.733538362892849e-07, "loss": 0.3124, "step": 16900 }, { "epoch": 2.45, "grad_norm": 10.652705192565918, "learning_rate": 1.7326477754470092e-07, "loss": 0.3752, "step": 16901 }, { "epoch": 2.45, "grad_norm": 9.354192733764648, "learning_rate": 1.731757395126243e-07, "loss": 0.3881, "step": 16902 }, { "epoch": 2.45, "grad_norm": 8.525593757629395, "learning_rate": 1.7308672219528608e-07, "loss": 0.3303, "step": 16903 }, { "epoch": 2.45, "grad_norm": 9.995977401733398, "learning_rate": 1.7299772559491643e-07, "loss": 0.3747, "step": 16904 }, { "epoch": 2.45, "grad_norm": 8.762923240661621, "learning_rate": 1.7290874971374513e-07, "loss": 0.3537, "step": 16905 }, { "epoch": 2.45, "grad_norm": 8.474910736083984, "learning_rate": 1.7281979455400119e-07, "loss": 0.341, "step": 16906 }, { "epoch": 2.45, "grad_norm": 8.646346092224121, "learning_rate": 1.7273086011791316e-07, "loss": 0.3089, "step": 16907 }, { "epoch": 2.45, "grad_norm": 8.74111557006836, "learning_rate": 1.7264194640770934e-07, "loss": 0.4174, "step": 16908 }, { "epoch": 2.45, "grad_norm": 8.803411483764648, "learning_rate": 1.7255305342561733e-07, "loss": 0.3575, "step": 16909 }, { "epoch": 2.45, "grad_norm": 9.09772777557373, "learning_rate": 1.7246418117386396e-07, "loss": 0.4256, "step": 16910 }, { "epoch": 2.45, "grad_norm": 9.864351272583008, "learning_rate": 1.7237532965467627e-07, "loss": 0.3859, "step": 16911 }, { "epoch": 2.45, "grad_norm": 9.253914833068848, "learning_rate": 1.7228649887027936e-07, "loss": 0.4178, "step": 16912 }, { "epoch": 2.45, "grad_norm": 8.089149475097656, "learning_rate": 1.7219768882289986e-07, "loss": 0.3689, "step": 16913 }, { "epoch": 2.45, "grad_norm": 8.916790008544922, "learning_rate": 1.721088995147617e-07, "loss": 0.4087, "step": 16914 }, { "epoch": 2.45, "grad_norm": 8.082039833068848, "learning_rate": 1.7202013094809043e-07, "loss": 0.3226, "step": 16915 }, { "epoch": 2.45, "grad_norm": 9.522329330444336, "learning_rate": 1.71931383125109e-07, "loss": 0.41, "step": 16916 }, { "epoch": 2.45, "grad_norm": 7.707902908325195, "learning_rate": 1.718426560480415e-07, "loss": 0.3559, "step": 16917 }, { "epoch": 2.45, "grad_norm": 9.524567604064941, "learning_rate": 1.7175394971911085e-07, "loss": 0.3762, "step": 16918 }, { "epoch": 2.45, "grad_norm": 8.040033340454102, "learning_rate": 1.7166526414053873e-07, "loss": 0.3663, "step": 16919 }, { "epoch": 2.46, "grad_norm": 7.307950496673584, "learning_rate": 1.7157659931454804e-07, "loss": 0.2944, "step": 16920 }, { "epoch": 2.46, "grad_norm": 9.265403747558594, "learning_rate": 1.7148795524335913e-07, "loss": 0.3674, "step": 16921 }, { "epoch": 2.46, "grad_norm": 8.86628246307373, "learning_rate": 1.7139933192919387e-07, "loss": 0.3662, "step": 16922 }, { "epoch": 2.46, "grad_norm": 8.599109649658203, "learning_rate": 1.713107293742716e-07, "loss": 0.4121, "step": 16923 }, { "epoch": 2.46, "grad_norm": 8.039215087890625, "learning_rate": 1.7122214758081266e-07, "loss": 0.3333, "step": 16924 }, { "epoch": 2.46, "grad_norm": 8.929237365722656, "learning_rate": 1.7113358655103615e-07, "loss": 0.396, "step": 16925 }, { "epoch": 2.46, "grad_norm": 9.223299026489258, "learning_rate": 1.710450462871609e-07, "loss": 0.3712, "step": 16926 }, { "epoch": 2.46, "grad_norm": 9.304146766662598, "learning_rate": 1.7095652679140505e-07, "loss": 0.3739, "step": 16927 }, { "epoch": 2.46, "grad_norm": 7.388282775878906, "learning_rate": 1.7086802806598632e-07, "loss": 0.3272, "step": 16928 }, { "epoch": 2.46, "grad_norm": 8.699419975280762, "learning_rate": 1.7077955011312205e-07, "loss": 0.3221, "step": 16929 }, { "epoch": 2.46, "grad_norm": 9.224924087524414, "learning_rate": 1.706910929350288e-07, "loss": 0.3529, "step": 16930 }, { "epoch": 2.46, "grad_norm": 9.743400573730469, "learning_rate": 1.706026565339227e-07, "loss": 0.4102, "step": 16931 }, { "epoch": 2.46, "grad_norm": 9.65588665008545, "learning_rate": 1.705142409120196e-07, "loss": 0.3577, "step": 16932 }, { "epoch": 2.46, "grad_norm": 8.6524658203125, "learning_rate": 1.70425846071534e-07, "loss": 0.3212, "step": 16933 }, { "epoch": 2.46, "grad_norm": 8.397747039794922, "learning_rate": 1.703374720146814e-07, "loss": 0.3811, "step": 16934 }, { "epoch": 2.46, "grad_norm": 8.807828903198242, "learning_rate": 1.7024911874367487e-07, "loss": 0.3546, "step": 16935 }, { "epoch": 2.46, "grad_norm": 8.961029052734375, "learning_rate": 1.7016078626072895e-07, "loss": 0.4117, "step": 16936 }, { "epoch": 2.46, "grad_norm": 8.105789184570312, "learning_rate": 1.7007247456805596e-07, "loss": 0.3349, "step": 16937 }, { "epoch": 2.46, "grad_norm": 8.361639022827148, "learning_rate": 1.6998418366786872e-07, "loss": 0.3295, "step": 16938 }, { "epoch": 2.46, "grad_norm": 9.54041862487793, "learning_rate": 1.6989591356237898e-07, "loss": 0.3497, "step": 16939 }, { "epoch": 2.46, "grad_norm": 7.12138032913208, "learning_rate": 1.6980766425379834e-07, "loss": 0.3103, "step": 16940 }, { "epoch": 2.46, "grad_norm": 9.6002779006958, "learning_rate": 1.6971943574433788e-07, "loss": 0.4194, "step": 16941 }, { "epoch": 2.46, "grad_norm": 7.2091217041015625, "learning_rate": 1.6963122803620756e-07, "loss": 0.3034, "step": 16942 }, { "epoch": 2.46, "grad_norm": 9.1961088180542, "learning_rate": 1.695430411316181e-07, "loss": 0.3949, "step": 16943 }, { "epoch": 2.46, "grad_norm": 8.595593452453613, "learning_rate": 1.6945487503277822e-07, "loss": 0.3965, "step": 16944 }, { "epoch": 2.46, "grad_norm": 7.598631858825684, "learning_rate": 1.6936672974189682e-07, "loss": 0.344, "step": 16945 }, { "epoch": 2.46, "grad_norm": 8.261110305786133, "learning_rate": 1.6927860526118244e-07, "loss": 0.3484, "step": 16946 }, { "epoch": 2.46, "grad_norm": 7.85697603225708, "learning_rate": 1.6919050159284287e-07, "loss": 0.3207, "step": 16947 }, { "epoch": 2.46, "grad_norm": 9.110098838806152, "learning_rate": 1.6910241873908538e-07, "loss": 0.3121, "step": 16948 }, { "epoch": 2.46, "grad_norm": 9.179662704467773, "learning_rate": 1.6901435670211662e-07, "loss": 0.3453, "step": 16949 }, { "epoch": 2.46, "grad_norm": 9.527738571166992, "learning_rate": 1.6892631548414293e-07, "loss": 0.3282, "step": 16950 }, { "epoch": 2.46, "grad_norm": 8.64741039276123, "learning_rate": 1.6883829508737013e-07, "loss": 0.3424, "step": 16951 }, { "epoch": 2.46, "grad_norm": 7.833268642425537, "learning_rate": 1.6875029551400332e-07, "loss": 0.306, "step": 16952 }, { "epoch": 2.46, "grad_norm": 8.419435501098633, "learning_rate": 1.6866231676624754e-07, "loss": 0.3464, "step": 16953 }, { "epoch": 2.46, "grad_norm": 9.481379508972168, "learning_rate": 1.685743588463061e-07, "loss": 0.4517, "step": 16954 }, { "epoch": 2.46, "grad_norm": 7.670567035675049, "learning_rate": 1.6848642175638362e-07, "loss": 0.3353, "step": 16955 }, { "epoch": 2.46, "grad_norm": 8.682414054870605, "learning_rate": 1.6839850549868229e-07, "loss": 0.2876, "step": 16956 }, { "epoch": 2.46, "grad_norm": 8.533666610717773, "learning_rate": 1.683106100754057e-07, "loss": 0.3356, "step": 16957 }, { "epoch": 2.46, "grad_norm": 9.007760047912598, "learning_rate": 1.6822273548875522e-07, "loss": 0.3882, "step": 16958 }, { "epoch": 2.46, "grad_norm": 9.417679786682129, "learning_rate": 1.681348817409325e-07, "loss": 0.3683, "step": 16959 }, { "epoch": 2.46, "grad_norm": 7.620168685913086, "learning_rate": 1.680470488341388e-07, "loss": 0.3532, "step": 16960 }, { "epoch": 2.46, "grad_norm": 8.267293930053711, "learning_rate": 1.6795923677057444e-07, "loss": 0.3485, "step": 16961 }, { "epoch": 2.46, "grad_norm": 9.559431076049805, "learning_rate": 1.6787144555243948e-07, "loss": 0.3815, "step": 16962 }, { "epoch": 2.46, "grad_norm": 8.69253921508789, "learning_rate": 1.6778367518193336e-07, "loss": 0.3723, "step": 16963 }, { "epoch": 2.46, "grad_norm": 9.216024398803711, "learning_rate": 1.6769592566125513e-07, "loss": 0.367, "step": 16964 }, { "epoch": 2.46, "grad_norm": 9.90637493133545, "learning_rate": 1.676081969926031e-07, "loss": 0.3681, "step": 16965 }, { "epoch": 2.46, "grad_norm": 8.522773742675781, "learning_rate": 1.675204891781752e-07, "loss": 0.3678, "step": 16966 }, { "epoch": 2.46, "grad_norm": 10.016719818115234, "learning_rate": 1.6743280222016897e-07, "loss": 0.3846, "step": 16967 }, { "epoch": 2.46, "grad_norm": 8.725173950195312, "learning_rate": 1.6734513612078072e-07, "loss": 0.3629, "step": 16968 }, { "epoch": 2.46, "grad_norm": 8.551273345947266, "learning_rate": 1.6725749088220732e-07, "loss": 0.3408, "step": 16969 }, { "epoch": 2.46, "grad_norm": 8.193838119506836, "learning_rate": 1.6716986650664444e-07, "loss": 0.2898, "step": 16970 }, { "epoch": 2.46, "grad_norm": 8.119917869567871, "learning_rate": 1.6708226299628747e-07, "loss": 0.3542, "step": 16971 }, { "epoch": 2.46, "grad_norm": 8.532708168029785, "learning_rate": 1.6699468035333087e-07, "loss": 0.3172, "step": 16972 }, { "epoch": 2.46, "grad_norm": 7.718219757080078, "learning_rate": 1.669071185799692e-07, "loss": 0.3384, "step": 16973 }, { "epoch": 2.46, "grad_norm": 8.094862937927246, "learning_rate": 1.6681957767839627e-07, "loss": 0.3325, "step": 16974 }, { "epoch": 2.46, "grad_norm": 8.728747367858887, "learning_rate": 1.6673205765080446e-07, "loss": 0.3412, "step": 16975 }, { "epoch": 2.46, "grad_norm": 8.402771949768066, "learning_rate": 1.6664455849938763e-07, "loss": 0.3062, "step": 16976 }, { "epoch": 2.46, "grad_norm": 9.463815689086914, "learning_rate": 1.6655708022633673e-07, "loss": 0.3749, "step": 16977 }, { "epoch": 2.46, "grad_norm": 7.326962471008301, "learning_rate": 1.6646962283384458e-07, "loss": 0.3297, "step": 16978 }, { "epoch": 2.46, "grad_norm": 9.678047180175781, "learning_rate": 1.6638218632410138e-07, "loss": 0.353, "step": 16979 }, { "epoch": 2.46, "grad_norm": 8.457123756408691, "learning_rate": 1.66294770699298e-07, "loss": 0.2951, "step": 16980 }, { "epoch": 2.46, "grad_norm": 7.823321342468262, "learning_rate": 1.6620737596162447e-07, "loss": 0.3447, "step": 16981 }, { "epoch": 2.46, "grad_norm": 8.643957138061523, "learning_rate": 1.6612000211327048e-07, "loss": 0.381, "step": 16982 }, { "epoch": 2.46, "grad_norm": 8.089614868164062, "learning_rate": 1.660326491564249e-07, "loss": 0.4239, "step": 16983 }, { "epoch": 2.46, "grad_norm": 8.385095596313477, "learning_rate": 1.659453170932762e-07, "loss": 0.3741, "step": 16984 }, { "epoch": 2.46, "grad_norm": 8.603963851928711, "learning_rate": 1.658580059260124e-07, "loss": 0.3329, "step": 16985 }, { "epoch": 2.46, "grad_norm": 11.949731826782227, "learning_rate": 1.6577071565682088e-07, "loss": 0.4544, "step": 16986 }, { "epoch": 2.46, "grad_norm": 9.501901626586914, "learning_rate": 1.6568344628788865e-07, "loss": 0.3839, "step": 16987 }, { "epoch": 2.46, "grad_norm": 7.765280246734619, "learning_rate": 1.6559619782140234e-07, "loss": 0.3257, "step": 16988 }, { "epoch": 2.47, "grad_norm": 8.093668937683105, "learning_rate": 1.6550897025954702e-07, "loss": 0.2821, "step": 16989 }, { "epoch": 2.47, "grad_norm": 8.925369262695312, "learning_rate": 1.65421763604509e-07, "loss": 0.3287, "step": 16990 }, { "epoch": 2.47, "grad_norm": 7.885560989379883, "learning_rate": 1.6533457785847215e-07, "loss": 0.3532, "step": 16991 }, { "epoch": 2.47, "grad_norm": 8.684846878051758, "learning_rate": 1.6524741302362178e-07, "loss": 0.394, "step": 16992 }, { "epoch": 2.47, "grad_norm": 8.293476104736328, "learning_rate": 1.6516026910214087e-07, "loss": 0.3642, "step": 16993 }, { "epoch": 2.47, "grad_norm": 8.483831405639648, "learning_rate": 1.650731460962127e-07, "loss": 0.3229, "step": 16994 }, { "epoch": 2.47, "grad_norm": 8.377945899963379, "learning_rate": 1.6498604400802073e-07, "loss": 0.3731, "step": 16995 }, { "epoch": 2.47, "grad_norm": 9.0901517868042, "learning_rate": 1.6489896283974613e-07, "loss": 0.3121, "step": 16996 }, { "epoch": 2.47, "grad_norm": 10.222843170166016, "learning_rate": 1.648119025935717e-07, "loss": 0.3674, "step": 16997 }, { "epoch": 2.47, "grad_norm": 9.564929008483887, "learning_rate": 1.6472486327167755e-07, "loss": 0.4266, "step": 16998 }, { "epoch": 2.47, "grad_norm": 9.186123847961426, "learning_rate": 1.6463784487624522e-07, "loss": 0.375, "step": 16999 }, { "epoch": 2.47, "grad_norm": 8.781126976013184, "learning_rate": 1.6455084740945414e-07, "loss": 0.3467, "step": 17000 }, { "epoch": 2.47, "grad_norm": 7.886415481567383, "learning_rate": 1.64463870873484e-07, "loss": 0.3441, "step": 17001 }, { "epoch": 2.47, "grad_norm": 9.300332069396973, "learning_rate": 1.643769152705141e-07, "loss": 0.3833, "step": 17002 }, { "epoch": 2.47, "grad_norm": 9.548562049865723, "learning_rate": 1.6428998060272282e-07, "loss": 0.4217, "step": 17003 }, { "epoch": 2.47, "grad_norm": 9.56931209564209, "learning_rate": 1.6420306687228814e-07, "loss": 0.3574, "step": 17004 }, { "epoch": 2.47, "grad_norm": 8.277981758117676, "learning_rate": 1.6411617408138767e-07, "loss": 0.3251, "step": 17005 }, { "epoch": 2.47, "grad_norm": 7.8434977531433105, "learning_rate": 1.6402930223219823e-07, "loss": 0.3665, "step": 17006 }, { "epoch": 2.47, "grad_norm": 9.078415870666504, "learning_rate": 1.6394245132689632e-07, "loss": 0.409, "step": 17007 }, { "epoch": 2.47, "grad_norm": 7.577838897705078, "learning_rate": 1.6385562136765784e-07, "loss": 0.3192, "step": 17008 }, { "epoch": 2.47, "grad_norm": 8.84914493560791, "learning_rate": 1.637688123566584e-07, "loss": 0.4099, "step": 17009 }, { "epoch": 2.47, "grad_norm": 8.841796875, "learning_rate": 1.6368202429607214e-07, "loss": 0.3559, "step": 17010 }, { "epoch": 2.47, "grad_norm": 7.692514419555664, "learning_rate": 1.6359525718807444e-07, "loss": 0.3496, "step": 17011 }, { "epoch": 2.47, "grad_norm": 9.379926681518555, "learning_rate": 1.6350851103483808e-07, "loss": 0.388, "step": 17012 }, { "epoch": 2.47, "grad_norm": 8.76045036315918, "learning_rate": 1.6342178583853727e-07, "loss": 0.3554, "step": 17013 }, { "epoch": 2.47, "grad_norm": 10.969873428344727, "learning_rate": 1.6333508160134403e-07, "loss": 0.3632, "step": 17014 }, { "epoch": 2.47, "grad_norm": 9.8948335647583, "learning_rate": 1.6324839832543093e-07, "loss": 0.3662, "step": 17015 }, { "epoch": 2.47, "grad_norm": 9.149600982666016, "learning_rate": 1.6316173601296967e-07, "loss": 0.3641, "step": 17016 }, { "epoch": 2.47, "grad_norm": 8.907865524291992, "learning_rate": 1.6307509466613135e-07, "loss": 0.376, "step": 17017 }, { "epoch": 2.47, "grad_norm": 9.125083923339844, "learning_rate": 1.6298847428708663e-07, "loss": 0.3585, "step": 17018 }, { "epoch": 2.47, "grad_norm": 7.502919673919678, "learning_rate": 1.6290187487800577e-07, "loss": 0.3361, "step": 17019 }, { "epoch": 2.47, "grad_norm": 9.888459205627441, "learning_rate": 1.628152964410582e-07, "loss": 0.3921, "step": 17020 }, { "epoch": 2.47, "grad_norm": 7.578619956970215, "learning_rate": 1.627287389784131e-07, "loss": 0.3307, "step": 17021 }, { "epoch": 2.47, "grad_norm": 8.526634216308594, "learning_rate": 1.6264220249223914e-07, "loss": 0.3223, "step": 17022 }, { "epoch": 2.47, "grad_norm": 7.994714736938477, "learning_rate": 1.625556869847041e-07, "loss": 0.3286, "step": 17023 }, { "epoch": 2.47, "grad_norm": 9.983154296875, "learning_rate": 1.6246919245797574e-07, "loss": 0.4164, "step": 17024 }, { "epoch": 2.47, "grad_norm": 10.027188301086426, "learning_rate": 1.6238271891422083e-07, "loss": 0.4492, "step": 17025 }, { "epoch": 2.47, "grad_norm": 7.646952152252197, "learning_rate": 1.62296266355606e-07, "loss": 0.2684, "step": 17026 }, { "epoch": 2.47, "grad_norm": 8.225375175476074, "learning_rate": 1.6220983478429695e-07, "loss": 0.3283, "step": 17027 }, { "epoch": 2.47, "grad_norm": 9.828963279724121, "learning_rate": 1.6212342420245962e-07, "loss": 0.3875, "step": 17028 }, { "epoch": 2.47, "grad_norm": 10.085983276367188, "learning_rate": 1.6203703461225791e-07, "loss": 0.45, "step": 17029 }, { "epoch": 2.47, "grad_norm": 8.337705612182617, "learning_rate": 1.6195066601585728e-07, "loss": 0.3548, "step": 17030 }, { "epoch": 2.47, "grad_norm": 8.437800407409668, "learning_rate": 1.6186431841542047e-07, "loss": 0.3138, "step": 17031 }, { "epoch": 2.47, "grad_norm": 7.799376487731934, "learning_rate": 1.6177799181311193e-07, "loss": 0.3684, "step": 17032 }, { "epoch": 2.47, "grad_norm": 7.429637908935547, "learning_rate": 1.616916862110933e-07, "loss": 0.3253, "step": 17033 }, { "epoch": 2.47, "grad_norm": 8.428522109985352, "learning_rate": 1.616054016115278e-07, "loss": 0.3597, "step": 17034 }, { "epoch": 2.47, "grad_norm": 8.883556365966797, "learning_rate": 1.6151913801657657e-07, "loss": 0.3794, "step": 17035 }, { "epoch": 2.47, "grad_norm": 8.310541152954102, "learning_rate": 1.614328954284009e-07, "loss": 0.3607, "step": 17036 }, { "epoch": 2.47, "grad_norm": 7.82478666305542, "learning_rate": 1.6134667384916146e-07, "loss": 0.2991, "step": 17037 }, { "epoch": 2.47, "grad_norm": 9.054171562194824, "learning_rate": 1.6126047328101856e-07, "loss": 0.3049, "step": 17038 }, { "epoch": 2.47, "grad_norm": 9.06572151184082, "learning_rate": 1.6117429372613156e-07, "loss": 0.4059, "step": 17039 }, { "epoch": 2.47, "grad_norm": 10.251625061035156, "learning_rate": 1.6108813518665976e-07, "loss": 0.4118, "step": 17040 }, { "epoch": 2.47, "grad_norm": 7.476428031921387, "learning_rate": 1.6100199766476164e-07, "loss": 0.3059, "step": 17041 }, { "epoch": 2.47, "grad_norm": 8.157658576965332, "learning_rate": 1.6091588116259512e-07, "loss": 0.3278, "step": 17042 }, { "epoch": 2.47, "grad_norm": 8.272494316101074, "learning_rate": 1.6082978568231797e-07, "loss": 0.3692, "step": 17043 }, { "epoch": 2.47, "grad_norm": 7.5926079750061035, "learning_rate": 1.607437112260872e-07, "loss": 0.3302, "step": 17044 }, { "epoch": 2.47, "grad_norm": 8.32275390625, "learning_rate": 1.6065765779605867e-07, "loss": 0.3195, "step": 17045 }, { "epoch": 2.47, "grad_norm": 9.057235717773438, "learning_rate": 1.6057162539438895e-07, "loss": 0.3864, "step": 17046 }, { "epoch": 2.47, "grad_norm": 8.71876335144043, "learning_rate": 1.6048561402323324e-07, "loss": 0.3776, "step": 17047 }, { "epoch": 2.47, "grad_norm": 8.726428985595703, "learning_rate": 1.6039962368474645e-07, "loss": 0.3466, "step": 17048 }, { "epoch": 2.47, "grad_norm": 8.003602981567383, "learning_rate": 1.60313654381083e-07, "loss": 0.3481, "step": 17049 }, { "epoch": 2.47, "grad_norm": 9.871124267578125, "learning_rate": 1.6022770611439618e-07, "loss": 0.3804, "step": 17050 }, { "epoch": 2.47, "grad_norm": 8.107653617858887, "learning_rate": 1.601417788868402e-07, "loss": 0.3547, "step": 17051 }, { "epoch": 2.47, "grad_norm": 9.917656898498535, "learning_rate": 1.6005587270056687e-07, "loss": 0.3719, "step": 17052 }, { "epoch": 2.47, "grad_norm": 8.193500518798828, "learning_rate": 1.5996998755772927e-07, "loss": 0.3395, "step": 17053 }, { "epoch": 2.47, "grad_norm": 8.193378448486328, "learning_rate": 1.598841234604783e-07, "loss": 0.3408, "step": 17054 }, { "epoch": 2.47, "grad_norm": 9.257078170776367, "learning_rate": 1.5979828041096598e-07, "loss": 0.4274, "step": 17055 }, { "epoch": 2.47, "grad_norm": 7.04869270324707, "learning_rate": 1.597124584113424e-07, "loss": 0.3385, "step": 17056 }, { "epoch": 2.47, "grad_norm": 8.298382759094238, "learning_rate": 1.5962665746375793e-07, "loss": 0.3423, "step": 17057 }, { "epoch": 2.48, "grad_norm": 8.11408519744873, "learning_rate": 1.5954087757036193e-07, "loss": 0.3291, "step": 17058 }, { "epoch": 2.48, "grad_norm": 11.242508888244629, "learning_rate": 1.5945511873330374e-07, "loss": 0.3887, "step": 17059 }, { "epoch": 2.48, "grad_norm": 8.872029304504395, "learning_rate": 1.5936938095473174e-07, "loss": 0.3269, "step": 17060 }, { "epoch": 2.48, "grad_norm": 9.56468391418457, "learning_rate": 1.5928366423679408e-07, "loss": 0.3291, "step": 17061 }, { "epoch": 2.48, "grad_norm": 8.72352123260498, "learning_rate": 1.5919796858163813e-07, "loss": 0.3485, "step": 17062 }, { "epoch": 2.48, "grad_norm": 7.919682025909424, "learning_rate": 1.5911229399141102e-07, "loss": 0.3421, "step": 17063 }, { "epoch": 2.48, "grad_norm": 9.499176025390625, "learning_rate": 1.5902664046825898e-07, "loss": 0.349, "step": 17064 }, { "epoch": 2.48, "grad_norm": 9.662046432495117, "learning_rate": 1.589410080143283e-07, "loss": 0.3382, "step": 17065 }, { "epoch": 2.48, "grad_norm": 7.7022294998168945, "learning_rate": 1.588553966317636e-07, "loss": 0.2971, "step": 17066 }, { "epoch": 2.48, "grad_norm": 7.60925817489624, "learning_rate": 1.5876980632271076e-07, "loss": 0.3164, "step": 17067 }, { "epoch": 2.48, "grad_norm": 7.819990158081055, "learning_rate": 1.58684237089313e-07, "loss": 0.3402, "step": 17068 }, { "epoch": 2.48, "grad_norm": 8.879426956176758, "learning_rate": 1.5859868893371518e-07, "loss": 0.3817, "step": 17069 }, { "epoch": 2.48, "grad_norm": 8.245139122009277, "learning_rate": 1.5851316185805995e-07, "loss": 0.3519, "step": 17070 }, { "epoch": 2.48, "grad_norm": 8.958826065063477, "learning_rate": 1.5842765586449002e-07, "loss": 0.3565, "step": 17071 }, { "epoch": 2.48, "grad_norm": 8.947233200073242, "learning_rate": 1.583421709551479e-07, "loss": 0.3468, "step": 17072 }, { "epoch": 2.48, "grad_norm": 7.79957914352417, "learning_rate": 1.5825670713217476e-07, "loss": 0.3331, "step": 17073 }, { "epoch": 2.48, "grad_norm": 8.935347557067871, "learning_rate": 1.5817126439771267e-07, "loss": 0.37, "step": 17074 }, { "epoch": 2.48, "grad_norm": 7.69529914855957, "learning_rate": 1.5808584275390158e-07, "loss": 0.2856, "step": 17075 }, { "epoch": 2.48, "grad_norm": 8.79520320892334, "learning_rate": 1.5800044220288154e-07, "loss": 0.3836, "step": 17076 }, { "epoch": 2.48, "grad_norm": 9.088140487670898, "learning_rate": 1.5791506274679246e-07, "loss": 0.34, "step": 17077 }, { "epoch": 2.48, "grad_norm": 9.378071784973145, "learning_rate": 1.578297043877732e-07, "loss": 0.4252, "step": 17078 }, { "epoch": 2.48, "grad_norm": 7.9985809326171875, "learning_rate": 1.5774436712796235e-07, "loss": 0.2906, "step": 17079 }, { "epoch": 2.48, "grad_norm": 9.820968627929688, "learning_rate": 1.5765905096949784e-07, "loss": 0.4552, "step": 17080 }, { "epoch": 2.48, "grad_norm": 8.37104320526123, "learning_rate": 1.5757375591451706e-07, "loss": 0.3342, "step": 17081 }, { "epoch": 2.48, "grad_norm": 8.330347061157227, "learning_rate": 1.5748848196515718e-07, "loss": 0.3449, "step": 17082 }, { "epoch": 2.48, "grad_norm": 9.151028633117676, "learning_rate": 1.574032291235543e-07, "loss": 0.3471, "step": 17083 }, { "epoch": 2.48, "grad_norm": 8.85108470916748, "learning_rate": 1.5731799739184482e-07, "loss": 0.3477, "step": 17084 }, { "epoch": 2.48, "grad_norm": 8.445255279541016, "learning_rate": 1.5723278677216313e-07, "loss": 0.3578, "step": 17085 }, { "epoch": 2.48, "grad_norm": 8.018223762512207, "learning_rate": 1.5714759726664506e-07, "loss": 0.3667, "step": 17086 }, { "epoch": 2.48, "grad_norm": 8.904661178588867, "learning_rate": 1.5706242887742404e-07, "loss": 0.3376, "step": 17087 }, { "epoch": 2.48, "grad_norm": 8.070527076721191, "learning_rate": 1.569772816066347e-07, "loss": 0.2775, "step": 17088 }, { "epoch": 2.48, "grad_norm": 7.746731281280518, "learning_rate": 1.5689215545640934e-07, "loss": 0.3297, "step": 17089 }, { "epoch": 2.48, "grad_norm": 9.454521179199219, "learning_rate": 1.5680705042888153e-07, "loss": 0.3488, "step": 17090 }, { "epoch": 2.48, "grad_norm": 8.925616264343262, "learning_rate": 1.5672196652618285e-07, "loss": 0.3859, "step": 17091 }, { "epoch": 2.48, "grad_norm": 9.481332778930664, "learning_rate": 1.5663690375044514e-07, "loss": 0.3742, "step": 17092 }, { "epoch": 2.48, "grad_norm": 8.144490242004395, "learning_rate": 1.5655186210379935e-07, "loss": 0.3305, "step": 17093 }, { "epoch": 2.48, "grad_norm": 8.308524131774902, "learning_rate": 1.564668415883763e-07, "loss": 0.3406, "step": 17094 }, { "epoch": 2.48, "grad_norm": 8.104004859924316, "learning_rate": 1.563818422063059e-07, "loss": 0.3396, "step": 17095 }, { "epoch": 2.48, "grad_norm": 9.10965347290039, "learning_rate": 1.5629686395971776e-07, "loss": 0.3104, "step": 17096 }, { "epoch": 2.48, "grad_norm": 8.416610717773438, "learning_rate": 1.562119068507408e-07, "loss": 0.3328, "step": 17097 }, { "epoch": 2.48, "grad_norm": 8.310823440551758, "learning_rate": 1.561269708815035e-07, "loss": 0.3011, "step": 17098 }, { "epoch": 2.48, "grad_norm": 10.195096015930176, "learning_rate": 1.5604205605413378e-07, "loss": 0.3787, "step": 17099 }, { "epoch": 2.48, "grad_norm": 9.196975708007812, "learning_rate": 1.5595716237075906e-07, "loss": 0.4287, "step": 17100 }, { "epoch": 2.48, "grad_norm": 8.219460487365723, "learning_rate": 1.558722898335063e-07, "loss": 0.3721, "step": 17101 }, { "epoch": 2.48, "grad_norm": 8.325196266174316, "learning_rate": 1.557874384445016e-07, "loss": 0.3588, "step": 17102 }, { "epoch": 2.48, "grad_norm": 7.655517101287842, "learning_rate": 1.55702608205871e-07, "loss": 0.3162, "step": 17103 }, { "epoch": 2.48, "grad_norm": 8.559124946594238, "learning_rate": 1.5561779911973972e-07, "loss": 0.3391, "step": 17104 }, { "epoch": 2.48, "grad_norm": 8.088467597961426, "learning_rate": 1.555330111882328e-07, "loss": 0.3292, "step": 17105 }, { "epoch": 2.48, "grad_norm": 10.306121826171875, "learning_rate": 1.554482444134736e-07, "loss": 0.3999, "step": 17106 }, { "epoch": 2.48, "grad_norm": 7.645390033721924, "learning_rate": 1.5536349879758692e-07, "loss": 0.3349, "step": 17107 }, { "epoch": 2.48, "grad_norm": 8.728631973266602, "learning_rate": 1.5527877434269486e-07, "loss": 0.3336, "step": 17108 }, { "epoch": 2.48, "grad_norm": 8.886852264404297, "learning_rate": 1.5519407105092108e-07, "loss": 0.3172, "step": 17109 }, { "epoch": 2.48, "grad_norm": 8.498632431030273, "learning_rate": 1.5510938892438674e-07, "loss": 0.3853, "step": 17110 }, { "epoch": 2.48, "grad_norm": 9.133010864257812, "learning_rate": 1.5502472796521426e-07, "loss": 0.3593, "step": 17111 }, { "epoch": 2.48, "grad_norm": 7.917935371398926, "learning_rate": 1.5494008817552416e-07, "loss": 0.349, "step": 17112 }, { "epoch": 2.48, "grad_norm": 8.41472339630127, "learning_rate": 1.5485546955743701e-07, "loss": 0.351, "step": 17113 }, { "epoch": 2.48, "grad_norm": 11.033744812011719, "learning_rate": 1.5477087211307283e-07, "loss": 0.351, "step": 17114 }, { "epoch": 2.48, "grad_norm": 10.464448928833008, "learning_rate": 1.5468629584455106e-07, "loss": 0.4153, "step": 17115 }, { "epoch": 2.48, "grad_norm": 8.890978813171387, "learning_rate": 1.5460174075399067e-07, "loss": 0.3955, "step": 17116 }, { "epoch": 2.48, "grad_norm": 8.547174453735352, "learning_rate": 1.545172068435101e-07, "loss": 0.3038, "step": 17117 }, { "epoch": 2.48, "grad_norm": 8.050095558166504, "learning_rate": 1.5443269411522718e-07, "loss": 0.3245, "step": 17118 }, { "epoch": 2.48, "grad_norm": 7.955942630767822, "learning_rate": 1.5434820257125913e-07, "loss": 0.3824, "step": 17119 }, { "epoch": 2.48, "grad_norm": 7.787697792053223, "learning_rate": 1.5426373221372292e-07, "loss": 0.3192, "step": 17120 }, { "epoch": 2.48, "grad_norm": 8.402071952819824, "learning_rate": 1.5417928304473482e-07, "loss": 0.3231, "step": 17121 }, { "epoch": 2.48, "grad_norm": 9.129587173461914, "learning_rate": 1.5409485506641006e-07, "loss": 0.3879, "step": 17122 }, { "epoch": 2.48, "grad_norm": 8.077184677124023, "learning_rate": 1.5401044828086474e-07, "loss": 0.3514, "step": 17123 }, { "epoch": 2.48, "grad_norm": 9.436017036437988, "learning_rate": 1.5392606269021256e-07, "loss": 0.4032, "step": 17124 }, { "epoch": 2.48, "grad_norm": 7.271381378173828, "learning_rate": 1.5384169829656834e-07, "loss": 0.3122, "step": 17125 }, { "epoch": 2.48, "grad_norm": 8.427919387817383, "learning_rate": 1.537573551020459e-07, "loss": 0.3871, "step": 17126 }, { "epoch": 2.49, "grad_norm": 8.518771171569824, "learning_rate": 1.5367303310875735e-07, "loss": 0.3811, "step": 17127 }, { "epoch": 2.49, "grad_norm": 8.608927726745605, "learning_rate": 1.535887323188163e-07, "loss": 0.3134, "step": 17128 }, { "epoch": 2.49, "grad_norm": 8.126923561096191, "learning_rate": 1.5350445273433378e-07, "loss": 0.309, "step": 17129 }, { "epoch": 2.49, "grad_norm": 8.347501754760742, "learning_rate": 1.5342019435742225e-07, "loss": 0.3451, "step": 17130 }, { "epoch": 2.49, "grad_norm": 8.648801803588867, "learning_rate": 1.5333595719019198e-07, "loss": 0.3212, "step": 17131 }, { "epoch": 2.49, "grad_norm": 8.57767105102539, "learning_rate": 1.5325174123475371e-07, "loss": 0.3156, "step": 17132 }, { "epoch": 2.49, "grad_norm": 7.301400661468506, "learning_rate": 1.5316754649321718e-07, "loss": 0.311, "step": 17133 }, { "epoch": 2.49, "grad_norm": 8.581289291381836, "learning_rate": 1.530833729676918e-07, "loss": 0.3825, "step": 17134 }, { "epoch": 2.49, "grad_norm": 8.590400695800781, "learning_rate": 1.5299922066028646e-07, "loss": 0.3308, "step": 17135 }, { "epoch": 2.49, "grad_norm": 10.506738662719727, "learning_rate": 1.529150895731094e-07, "loss": 0.4154, "step": 17136 }, { "epoch": 2.49, "grad_norm": 8.830820083618164, "learning_rate": 1.528309797082683e-07, "loss": 0.3255, "step": 17137 }, { "epoch": 2.49, "grad_norm": 8.440428733825684, "learning_rate": 1.5274689106787065e-07, "loss": 0.3485, "step": 17138 }, { "epoch": 2.49, "grad_norm": 7.441247463226318, "learning_rate": 1.5266282365402295e-07, "loss": 0.3017, "step": 17139 }, { "epoch": 2.49, "grad_norm": 9.827308654785156, "learning_rate": 1.5257877746883164e-07, "loss": 0.4051, "step": 17140 }, { "epoch": 2.49, "grad_norm": 8.915974617004395, "learning_rate": 1.524947525144017e-07, "loss": 0.3408, "step": 17141 }, { "epoch": 2.49, "grad_norm": 8.81235122680664, "learning_rate": 1.524107487928391e-07, "loss": 0.3249, "step": 17142 }, { "epoch": 2.49, "grad_norm": 7.966267108917236, "learning_rate": 1.5232676630624752e-07, "loss": 0.3734, "step": 17143 }, { "epoch": 2.49, "grad_norm": 8.600494384765625, "learning_rate": 1.5224280505673205e-07, "loss": 0.3727, "step": 17144 }, { "epoch": 2.49, "grad_norm": 8.10032844543457, "learning_rate": 1.5215886504639508e-07, "loss": 0.338, "step": 17145 }, { "epoch": 2.49, "grad_norm": 8.47461986541748, "learning_rate": 1.5207494627734064e-07, "loss": 0.3389, "step": 17146 }, { "epoch": 2.49, "grad_norm": 8.554952621459961, "learning_rate": 1.5199104875167058e-07, "loss": 0.3541, "step": 17147 }, { "epoch": 2.49, "grad_norm": 9.163702011108398, "learning_rate": 1.5190717247148676e-07, "loss": 0.454, "step": 17148 }, { "epoch": 2.49, "grad_norm": 9.460480690002441, "learning_rate": 1.5182331743889076e-07, "loss": 0.4127, "step": 17149 }, { "epoch": 2.49, "grad_norm": 8.10531997680664, "learning_rate": 1.5173948365598343e-07, "loss": 0.3262, "step": 17150 }, { "epoch": 2.49, "grad_norm": 8.916228294372559, "learning_rate": 1.516556711248651e-07, "loss": 0.3926, "step": 17151 }, { "epoch": 2.49, "grad_norm": 8.421627044677734, "learning_rate": 1.515718798476354e-07, "loss": 0.348, "step": 17152 }, { "epoch": 2.49, "grad_norm": 9.0174560546875, "learning_rate": 1.514881098263938e-07, "loss": 0.3874, "step": 17153 }, { "epoch": 2.49, "grad_norm": 9.838616371154785, "learning_rate": 1.5140436106323896e-07, "loss": 0.3592, "step": 17154 }, { "epoch": 2.49, "grad_norm": 9.952308654785156, "learning_rate": 1.513206335602689e-07, "loss": 0.4017, "step": 17155 }, { "epoch": 2.49, "grad_norm": 10.63076400756836, "learning_rate": 1.5123692731958147e-07, "loss": 0.4616, "step": 17156 }, { "epoch": 2.49, "grad_norm": 8.398527145385742, "learning_rate": 1.5115324234327377e-07, "loss": 0.344, "step": 17157 }, { "epoch": 2.49, "grad_norm": 8.441757202148438, "learning_rate": 1.5106957863344228e-07, "loss": 0.3475, "step": 17158 }, { "epoch": 2.49, "grad_norm": 9.13119888305664, "learning_rate": 1.5098593619218313e-07, "loss": 0.3703, "step": 17159 }, { "epoch": 2.49, "grad_norm": 8.597822189331055, "learning_rate": 1.509023150215919e-07, "loss": 0.3171, "step": 17160 }, { "epoch": 2.49, "grad_norm": 9.167890548706055, "learning_rate": 1.5081871512376376e-07, "loss": 0.407, "step": 17161 }, { "epoch": 2.49, "grad_norm": 9.327225685119629, "learning_rate": 1.507351365007924e-07, "loss": 0.3417, "step": 17162 }, { "epoch": 2.49, "grad_norm": 8.338973999023438, "learning_rate": 1.5065157915477288e-07, "loss": 0.3638, "step": 17163 }, { "epoch": 2.49, "grad_norm": 8.349611282348633, "learning_rate": 1.5056804308779735e-07, "loss": 0.3347, "step": 17164 }, { "epoch": 2.49, "grad_norm": 9.322493553161621, "learning_rate": 1.504845283019599e-07, "loss": 0.309, "step": 17165 }, { "epoch": 2.49, "grad_norm": 7.998121738433838, "learning_rate": 1.5040103479935208e-07, "loss": 0.3074, "step": 17166 }, { "epoch": 2.49, "grad_norm": 8.075624465942383, "learning_rate": 1.5031756258206586e-07, "loss": 0.3516, "step": 17167 }, { "epoch": 2.49, "grad_norm": 7.09148645401001, "learning_rate": 1.5023411165219247e-07, "loss": 0.2894, "step": 17168 }, { "epoch": 2.49, "grad_norm": 8.775545120239258, "learning_rate": 1.501506820118227e-07, "loss": 0.3591, "step": 17169 }, { "epoch": 2.49, "grad_norm": 8.141989707946777, "learning_rate": 1.5006727366304672e-07, "loss": 0.299, "step": 17170 }, { "epoch": 2.49, "grad_norm": 8.120166778564453, "learning_rate": 1.4998388660795426e-07, "loss": 0.3257, "step": 17171 }, { "epoch": 2.49, "grad_norm": 7.933899879455566, "learning_rate": 1.4990052084863436e-07, "loss": 0.3361, "step": 17172 }, { "epoch": 2.49, "grad_norm": 8.072769165039062, "learning_rate": 1.498171763871756e-07, "loss": 0.3826, "step": 17173 }, { "epoch": 2.49, "grad_norm": 8.566930770874023, "learning_rate": 1.4973385322566613e-07, "loss": 0.3429, "step": 17174 }, { "epoch": 2.49, "grad_norm": 8.014673233032227, "learning_rate": 1.4965055136619363e-07, "loss": 0.3619, "step": 17175 }, { "epoch": 2.49, "grad_norm": 9.054442405700684, "learning_rate": 1.4956727081084453e-07, "loss": 0.4095, "step": 17176 }, { "epoch": 2.49, "grad_norm": 9.126311302185059, "learning_rate": 1.4948401156170575e-07, "loss": 0.3356, "step": 17177 }, { "epoch": 2.49, "grad_norm": 8.826948165893555, "learning_rate": 1.4940077362086324e-07, "loss": 0.3417, "step": 17178 }, { "epoch": 2.49, "grad_norm": 9.473559379577637, "learning_rate": 1.4931755699040216e-07, "loss": 0.347, "step": 17179 }, { "epoch": 2.49, "grad_norm": 8.422417640686035, "learning_rate": 1.4923436167240756e-07, "loss": 0.3372, "step": 17180 }, { "epoch": 2.49, "grad_norm": 7.390013694763184, "learning_rate": 1.4915118766896362e-07, "loss": 0.3294, "step": 17181 }, { "epoch": 2.49, "grad_norm": 9.854628562927246, "learning_rate": 1.490680349821545e-07, "loss": 0.3756, "step": 17182 }, { "epoch": 2.49, "grad_norm": 9.462972640991211, "learning_rate": 1.4898490361406257e-07, "loss": 0.3634, "step": 17183 }, { "epoch": 2.49, "grad_norm": 8.616847038269043, "learning_rate": 1.4890179356677157e-07, "loss": 0.3404, "step": 17184 }, { "epoch": 2.49, "grad_norm": 9.410768508911133, "learning_rate": 1.4881870484236293e-07, "loss": 0.3114, "step": 17185 }, { "epoch": 2.49, "grad_norm": 9.1182279586792, "learning_rate": 1.4873563744291895e-07, "loss": 0.378, "step": 17186 }, { "epoch": 2.49, "grad_norm": 8.444157600402832, "learning_rate": 1.4865259137052022e-07, "loss": 0.3178, "step": 17187 }, { "epoch": 2.49, "grad_norm": 10.15420150756836, "learning_rate": 1.4856956662724762e-07, "loss": 0.3846, "step": 17188 }, { "epoch": 2.49, "grad_norm": 9.5927095413208, "learning_rate": 1.4848656321518094e-07, "loss": 0.4114, "step": 17189 }, { "epoch": 2.49, "grad_norm": 8.344629287719727, "learning_rate": 1.4840358113639994e-07, "loss": 0.3479, "step": 17190 }, { "epoch": 2.49, "grad_norm": 10.527938842773438, "learning_rate": 1.4832062039298355e-07, "loss": 0.3956, "step": 17191 }, { "epoch": 2.49, "grad_norm": 8.645247459411621, "learning_rate": 1.4823768098701005e-07, "loss": 0.3698, "step": 17192 }, { "epoch": 2.49, "grad_norm": 7.975743293762207, "learning_rate": 1.481547629205576e-07, "loss": 0.3222, "step": 17193 }, { "epoch": 2.49, "grad_norm": 9.53078842163086, "learning_rate": 1.4807186619570344e-07, "loss": 0.3787, "step": 17194 }, { "epoch": 2.49, "grad_norm": 8.39867877960205, "learning_rate": 1.4798899081452444e-07, "loss": 0.3139, "step": 17195 }, { "epoch": 2.5, "grad_norm": 8.560555458068848, "learning_rate": 1.4790613677909714e-07, "loss": 0.3997, "step": 17196 }, { "epoch": 2.5, "grad_norm": 7.99487829208374, "learning_rate": 1.4782330409149658e-07, "loss": 0.3205, "step": 17197 }, { "epoch": 2.5, "grad_norm": 8.583840370178223, "learning_rate": 1.4774049275379895e-07, "loss": 0.3662, "step": 17198 }, { "epoch": 2.5, "grad_norm": 9.64639949798584, "learning_rate": 1.4765770276807808e-07, "loss": 0.3537, "step": 17199 }, { "epoch": 2.5, "grad_norm": 10.311870574951172, "learning_rate": 1.4757493413640899e-07, "loss": 0.3768, "step": 17200 }, { "epoch": 2.5, "grad_norm": 8.520857810974121, "learning_rate": 1.474921868608644e-07, "loss": 0.3711, "step": 17201 }, { "epoch": 2.5, "grad_norm": 8.78956127166748, "learning_rate": 1.4740946094351836e-07, "loss": 0.3208, "step": 17202 }, { "epoch": 2.5, "grad_norm": 8.203001976013184, "learning_rate": 1.4732675638644276e-07, "loss": 0.3473, "step": 17203 }, { "epoch": 2.5, "grad_norm": 7.67118501663208, "learning_rate": 1.4724407319170962e-07, "loss": 0.3169, "step": 17204 }, { "epoch": 2.5, "grad_norm": 9.478401184082031, "learning_rate": 1.4716141136139104e-07, "loss": 0.3685, "step": 17205 }, { "epoch": 2.5, "grad_norm": 8.123319625854492, "learning_rate": 1.4707877089755717e-07, "loss": 0.3506, "step": 17206 }, { "epoch": 2.5, "grad_norm": 8.247220039367676, "learning_rate": 1.4699615180227932e-07, "loss": 0.3814, "step": 17207 }, { "epoch": 2.5, "grad_norm": 9.476229667663574, "learning_rate": 1.469135540776266e-07, "loss": 0.3906, "step": 17208 }, { "epoch": 2.5, "grad_norm": 8.458993911743164, "learning_rate": 1.4683097772566866e-07, "loss": 0.2974, "step": 17209 }, { "epoch": 2.5, "grad_norm": 9.121695518493652, "learning_rate": 1.4674842274847442e-07, "loss": 0.4239, "step": 17210 }, { "epoch": 2.5, "grad_norm": 8.55292797088623, "learning_rate": 1.4666588914811196e-07, "loss": 0.3422, "step": 17211 }, { "epoch": 2.5, "grad_norm": 8.883508682250977, "learning_rate": 1.4658337692664912e-07, "loss": 0.3884, "step": 17212 }, { "epoch": 2.5, "grad_norm": 8.915894508361816, "learning_rate": 1.4650088608615318e-07, "loss": 0.358, "step": 17213 }, { "epoch": 2.5, "grad_norm": 8.381759643554688, "learning_rate": 1.464184166286907e-07, "loss": 0.3203, "step": 17214 }, { "epoch": 2.5, "grad_norm": 9.331364631652832, "learning_rate": 1.4633596855632792e-07, "loss": 0.3502, "step": 17215 }, { "epoch": 2.5, "grad_norm": 9.990453720092773, "learning_rate": 1.4625354187113038e-07, "loss": 0.3591, "step": 17216 }, { "epoch": 2.5, "grad_norm": 9.84515380859375, "learning_rate": 1.4617113657516333e-07, "loss": 0.403, "step": 17217 }, { "epoch": 2.5, "grad_norm": 8.239072799682617, "learning_rate": 1.460887526704907e-07, "loss": 0.3476, "step": 17218 }, { "epoch": 2.5, "grad_norm": 8.565648078918457, "learning_rate": 1.4600639015917737e-07, "loss": 0.3561, "step": 17219 }, { "epoch": 2.5, "grad_norm": 8.684042930603027, "learning_rate": 1.4592404904328592e-07, "loss": 0.3858, "step": 17220 }, { "epoch": 2.5, "grad_norm": 10.017767906188965, "learning_rate": 1.4584172932488013e-07, "loss": 0.4358, "step": 17221 }, { "epoch": 2.5, "grad_norm": 8.631709098815918, "learning_rate": 1.4575943100602172e-07, "loss": 0.3259, "step": 17222 }, { "epoch": 2.5, "grad_norm": 7.675398349761963, "learning_rate": 1.456771540887728e-07, "loss": 0.3284, "step": 17223 }, { "epoch": 2.5, "grad_norm": 8.667375564575195, "learning_rate": 1.4559489857519459e-07, "loss": 0.3777, "step": 17224 }, { "epoch": 2.5, "grad_norm": 7.293550491333008, "learning_rate": 1.45512664467348e-07, "loss": 0.3258, "step": 17225 }, { "epoch": 2.5, "grad_norm": 8.283373832702637, "learning_rate": 1.4543045176729318e-07, "loss": 0.3361, "step": 17226 }, { "epoch": 2.5, "grad_norm": 7.971750736236572, "learning_rate": 1.453482604770898e-07, "loss": 0.3321, "step": 17227 }, { "epoch": 2.5, "grad_norm": 8.249858856201172, "learning_rate": 1.4526609059879714e-07, "loss": 0.357, "step": 17228 }, { "epoch": 2.5, "grad_norm": 9.704598426818848, "learning_rate": 1.4518394213447383e-07, "loss": 0.424, "step": 17229 }, { "epoch": 2.5, "grad_norm": 9.089489936828613, "learning_rate": 1.451018150861778e-07, "loss": 0.4052, "step": 17230 }, { "epoch": 2.5, "grad_norm": 8.111762046813965, "learning_rate": 1.4501970945596688e-07, "loss": 0.3616, "step": 17231 }, { "epoch": 2.5, "grad_norm": 8.404812812805176, "learning_rate": 1.449376252458978e-07, "loss": 0.3495, "step": 17232 }, { "epoch": 2.5, "grad_norm": 8.997725486755371, "learning_rate": 1.4485556245802733e-07, "loss": 0.348, "step": 17233 }, { "epoch": 2.5, "grad_norm": 8.272542953491211, "learning_rate": 1.447735210944112e-07, "loss": 0.4129, "step": 17234 }, { "epoch": 2.5, "grad_norm": 7.900732040405273, "learning_rate": 1.446915011571048e-07, "loss": 0.3591, "step": 17235 }, { "epoch": 2.5, "grad_norm": 8.309502601623535, "learning_rate": 1.4460950264816329e-07, "loss": 0.3545, "step": 17236 }, { "epoch": 2.5, "grad_norm": 9.969940185546875, "learning_rate": 1.4452752556964075e-07, "loss": 0.3783, "step": 17237 }, { "epoch": 2.5, "grad_norm": 8.350976943969727, "learning_rate": 1.4444556992359125e-07, "loss": 0.3203, "step": 17238 }, { "epoch": 2.5, "grad_norm": 7.925102710723877, "learning_rate": 1.4436363571206744e-07, "loss": 0.3269, "step": 17239 }, { "epoch": 2.5, "grad_norm": 8.921141624450684, "learning_rate": 1.4428172293712305e-07, "loss": 0.3811, "step": 17240 }, { "epoch": 2.5, "grad_norm": 9.284019470214844, "learning_rate": 1.4419983160080908e-07, "loss": 0.386, "step": 17241 }, { "epoch": 2.5, "grad_norm": 8.694321632385254, "learning_rate": 1.441179617051783e-07, "loss": 0.3693, "step": 17242 }, { "epoch": 2.5, "grad_norm": 8.623420715332031, "learning_rate": 1.4403611325228116e-07, "loss": 0.2714, "step": 17243 }, { "epoch": 2.5, "grad_norm": 7.759565830230713, "learning_rate": 1.4395428624416828e-07, "loss": 0.302, "step": 17244 }, { "epoch": 2.5, "grad_norm": 7.332972526550293, "learning_rate": 1.4387248068288993e-07, "loss": 0.3348, "step": 17245 }, { "epoch": 2.5, "grad_norm": 8.501371383666992, "learning_rate": 1.4379069657049549e-07, "loss": 0.3302, "step": 17246 }, { "epoch": 2.5, "grad_norm": 9.530899047851562, "learning_rate": 1.4370893390903393e-07, "loss": 0.3503, "step": 17247 }, { "epoch": 2.5, "grad_norm": 8.475297927856445, "learning_rate": 1.436271927005538e-07, "loss": 0.3467, "step": 17248 }, { "epoch": 2.5, "grad_norm": 8.30251407623291, "learning_rate": 1.4354547294710285e-07, "loss": 0.3425, "step": 17249 }, { "epoch": 2.5, "grad_norm": 8.838499069213867, "learning_rate": 1.4346377465072845e-07, "loss": 0.3846, "step": 17250 }, { "epoch": 2.5, "grad_norm": 7.9803972244262695, "learning_rate": 1.4338209781347743e-07, "loss": 0.3301, "step": 17251 }, { "epoch": 2.5, "grad_norm": 9.36567211151123, "learning_rate": 1.4330044243739636e-07, "loss": 0.3795, "step": 17252 }, { "epoch": 2.5, "grad_norm": 8.845783233642578, "learning_rate": 1.4321880852453015e-07, "loss": 0.3371, "step": 17253 }, { "epoch": 2.5, "grad_norm": 8.282994270324707, "learning_rate": 1.431371960769252e-07, "loss": 0.3602, "step": 17254 }, { "epoch": 2.5, "grad_norm": 8.408235549926758, "learning_rate": 1.4305560509662494e-07, "loss": 0.3688, "step": 17255 }, { "epoch": 2.5, "grad_norm": 9.13668155670166, "learning_rate": 1.4297403558567443e-07, "loss": 0.381, "step": 17256 }, { "epoch": 2.5, "grad_norm": 8.071585655212402, "learning_rate": 1.4289248754611716e-07, "loss": 0.3053, "step": 17257 }, { "epoch": 2.5, "grad_norm": 9.499671936035156, "learning_rate": 1.4281096097999555e-07, "loss": 0.3472, "step": 17258 }, { "epoch": 2.5, "grad_norm": 8.71422290802002, "learning_rate": 1.4272945588935293e-07, "loss": 0.3781, "step": 17259 }, { "epoch": 2.5, "grad_norm": 8.584331512451172, "learning_rate": 1.4264797227623039e-07, "loss": 0.3688, "step": 17260 }, { "epoch": 2.5, "grad_norm": 8.695368766784668, "learning_rate": 1.425665101426703e-07, "loss": 0.3795, "step": 17261 }, { "epoch": 2.5, "grad_norm": 8.09775447845459, "learning_rate": 1.4248506949071272e-07, "loss": 0.3428, "step": 17262 }, { "epoch": 2.5, "grad_norm": 7.718649864196777, "learning_rate": 1.4240365032239875e-07, "loss": 0.3199, "step": 17263 }, { "epoch": 2.5, "grad_norm": 9.865920066833496, "learning_rate": 1.4232225263976783e-07, "loss": 0.3801, "step": 17264 }, { "epoch": 2.51, "grad_norm": 8.548502922058105, "learning_rate": 1.4224087644485915e-07, "loss": 0.3694, "step": 17265 }, { "epoch": 2.51, "grad_norm": 8.878894805908203, "learning_rate": 1.4215952173971168e-07, "loss": 0.33, "step": 17266 }, { "epoch": 2.51, "grad_norm": 10.021821022033691, "learning_rate": 1.4207818852636343e-07, "loss": 0.3978, "step": 17267 }, { "epoch": 2.51, "grad_norm": 8.981520652770996, "learning_rate": 1.4199687680685235e-07, "loss": 0.3819, "step": 17268 }, { "epoch": 2.51, "grad_norm": 8.526944160461426, "learning_rate": 1.4191558658321534e-07, "loss": 0.3323, "step": 17269 }, { "epoch": 2.51, "grad_norm": 8.49893569946289, "learning_rate": 1.41834317857489e-07, "loss": 0.3985, "step": 17270 }, { "epoch": 2.51, "grad_norm": 9.169515609741211, "learning_rate": 1.417530706317095e-07, "loss": 0.436, "step": 17271 }, { "epoch": 2.51, "grad_norm": 8.68726921081543, "learning_rate": 1.416718449079124e-07, "loss": 0.3384, "step": 17272 }, { "epoch": 2.51, "grad_norm": 8.480862617492676, "learning_rate": 1.4159064068813264e-07, "loss": 0.381, "step": 17273 }, { "epoch": 2.51, "grad_norm": 9.968530654907227, "learning_rate": 1.4150945797440427e-07, "loss": 0.3654, "step": 17274 }, { "epoch": 2.51, "grad_norm": 8.480015754699707, "learning_rate": 1.41428296768762e-07, "loss": 0.3512, "step": 17275 }, { "epoch": 2.51, "grad_norm": 8.051847457885742, "learning_rate": 1.413471570732382e-07, "loss": 0.3243, "step": 17276 }, { "epoch": 2.51, "grad_norm": 7.687819957733154, "learning_rate": 1.4126603888986676e-07, "loss": 0.3191, "step": 17277 }, { "epoch": 2.51, "grad_norm": 8.660768508911133, "learning_rate": 1.411849422206791e-07, "loss": 0.4344, "step": 17278 }, { "epoch": 2.51, "grad_norm": 9.339080810546875, "learning_rate": 1.4110386706770738e-07, "loss": 0.3724, "step": 17279 }, { "epoch": 2.51, "grad_norm": 8.655035972595215, "learning_rate": 1.410228134329826e-07, "loss": 0.3281, "step": 17280 }, { "epoch": 2.51, "grad_norm": 8.065942764282227, "learning_rate": 1.4094178131853541e-07, "loss": 0.3294, "step": 17281 }, { "epoch": 2.51, "grad_norm": 9.20589542388916, "learning_rate": 1.4086077072639647e-07, "loss": 0.3863, "step": 17282 }, { "epoch": 2.51, "grad_norm": 7.827953815460205, "learning_rate": 1.407797816585945e-07, "loss": 0.3355, "step": 17283 }, { "epoch": 2.51, "grad_norm": 10.886292457580566, "learning_rate": 1.4069881411715945e-07, "loss": 0.3662, "step": 17284 }, { "epoch": 2.51, "grad_norm": 7.498947620391846, "learning_rate": 1.4061786810411924e-07, "loss": 0.341, "step": 17285 }, { "epoch": 2.51, "grad_norm": 8.510581970214844, "learning_rate": 1.4053694362150192e-07, "loss": 0.338, "step": 17286 }, { "epoch": 2.51, "grad_norm": 10.89301586151123, "learning_rate": 1.4045604067133508e-07, "loss": 0.4615, "step": 17287 }, { "epoch": 2.51, "grad_norm": 7.832448482513428, "learning_rate": 1.4037515925564548e-07, "loss": 0.3213, "step": 17288 }, { "epoch": 2.51, "grad_norm": 9.333617210388184, "learning_rate": 1.4029429937645953e-07, "loss": 0.3836, "step": 17289 }, { "epoch": 2.51, "grad_norm": 9.706062316894531, "learning_rate": 1.4021346103580301e-07, "loss": 0.4168, "step": 17290 }, { "epoch": 2.51, "grad_norm": 9.53205680847168, "learning_rate": 1.4013264423570127e-07, "loss": 0.3352, "step": 17291 }, { "epoch": 2.51, "grad_norm": 8.921524047851562, "learning_rate": 1.400518489781789e-07, "loss": 0.3742, "step": 17292 }, { "epoch": 2.51, "grad_norm": 9.193807601928711, "learning_rate": 1.3997107526526032e-07, "loss": 0.3347, "step": 17293 }, { "epoch": 2.51, "grad_norm": 9.185517311096191, "learning_rate": 1.398903230989692e-07, "loss": 0.3699, "step": 17294 }, { "epoch": 2.51, "grad_norm": 8.598628997802734, "learning_rate": 1.3980959248132796e-07, "loss": 0.3371, "step": 17295 }, { "epoch": 2.51, "grad_norm": 9.245506286621094, "learning_rate": 1.3972888341436028e-07, "loss": 0.3281, "step": 17296 }, { "epoch": 2.51, "grad_norm": 8.246764183044434, "learning_rate": 1.3964819590008715e-07, "loss": 0.315, "step": 17297 }, { "epoch": 2.51, "grad_norm": 8.26128101348877, "learning_rate": 1.39567529940531e-07, "loss": 0.3201, "step": 17298 }, { "epoch": 2.51, "grad_norm": 7.469770908355713, "learning_rate": 1.3948688553771215e-07, "loss": 0.3356, "step": 17299 }, { "epoch": 2.51, "grad_norm": 7.95970344543457, "learning_rate": 1.3940626269365118e-07, "loss": 0.3229, "step": 17300 }, { "epoch": 2.51, "grad_norm": 9.901293754577637, "learning_rate": 1.3932566141036796e-07, "loss": 0.3581, "step": 17301 }, { "epoch": 2.51, "grad_norm": 8.941608428955078, "learning_rate": 1.3924508168988182e-07, "loss": 0.4102, "step": 17302 }, { "epoch": 2.51, "grad_norm": 7.329614162445068, "learning_rate": 1.391645235342116e-07, "loss": 0.3078, "step": 17303 }, { "epoch": 2.51, "grad_norm": 8.677376747131348, "learning_rate": 1.390839869453756e-07, "loss": 0.3657, "step": 17304 }, { "epoch": 2.51, "grad_norm": 8.498429298400879, "learning_rate": 1.3900347192539141e-07, "loss": 0.3435, "step": 17305 }, { "epoch": 2.51, "grad_norm": 8.673702239990234, "learning_rate": 1.389229784762762e-07, "loss": 0.329, "step": 17306 }, { "epoch": 2.51, "grad_norm": 7.408113956451416, "learning_rate": 1.388425066000467e-07, "loss": 0.3427, "step": 17307 }, { "epoch": 2.51, "grad_norm": 8.013827323913574, "learning_rate": 1.3876205629871895e-07, "loss": 0.3144, "step": 17308 }, { "epoch": 2.51, "grad_norm": 8.40073299407959, "learning_rate": 1.3868162757430856e-07, "loss": 0.3854, "step": 17309 }, { "epoch": 2.51, "grad_norm": 8.678619384765625, "learning_rate": 1.3860122042883048e-07, "loss": 0.3144, "step": 17310 }, { "epoch": 2.51, "grad_norm": 8.998003005981445, "learning_rate": 1.3852083486429922e-07, "loss": 0.3875, "step": 17311 }, { "epoch": 2.51, "grad_norm": 8.519564628601074, "learning_rate": 1.384404708827287e-07, "loss": 0.3515, "step": 17312 }, { "epoch": 2.51, "grad_norm": 7.753181457519531, "learning_rate": 1.3836012848613242e-07, "loss": 0.347, "step": 17313 }, { "epoch": 2.51, "grad_norm": 9.145074844360352, "learning_rate": 1.382798076765227e-07, "loss": 0.3256, "step": 17314 }, { "epoch": 2.51, "grad_norm": 8.991294860839844, "learning_rate": 1.3819950845591278e-07, "loss": 0.3627, "step": 17315 }, { "epoch": 2.51, "grad_norm": 9.904051780700684, "learning_rate": 1.381192308263134e-07, "loss": 0.3766, "step": 17316 }, { "epoch": 2.51, "grad_norm": 9.227469444274902, "learning_rate": 1.380389747897367e-07, "loss": 0.3572, "step": 17317 }, { "epoch": 2.51, "grad_norm": 8.623760223388672, "learning_rate": 1.3795874034819244e-07, "loss": 0.3421, "step": 17318 }, { "epoch": 2.51, "grad_norm": 8.797776222229004, "learning_rate": 1.3787852750369179e-07, "loss": 0.3705, "step": 17319 }, { "epoch": 2.51, "grad_norm": 8.783241271972656, "learning_rate": 1.3779833625824367e-07, "loss": 0.3989, "step": 17320 }, { "epoch": 2.51, "grad_norm": 10.60370922088623, "learning_rate": 1.3771816661385728e-07, "loss": 0.3623, "step": 17321 }, { "epoch": 2.51, "grad_norm": 7.33352518081665, "learning_rate": 1.3763801857254109e-07, "loss": 0.3113, "step": 17322 }, { "epoch": 2.51, "grad_norm": 9.088756561279297, "learning_rate": 1.375578921363032e-07, "loss": 0.3566, "step": 17323 }, { "epoch": 2.51, "grad_norm": 9.955883979797363, "learning_rate": 1.3747778730715098e-07, "loss": 0.361, "step": 17324 }, { "epoch": 2.51, "grad_norm": 8.501056671142578, "learning_rate": 1.3739770408709138e-07, "loss": 0.3279, "step": 17325 }, { "epoch": 2.51, "grad_norm": 8.502361297607422, "learning_rate": 1.373176424781307e-07, "loss": 0.3466, "step": 17326 }, { "epoch": 2.51, "grad_norm": 9.4519681930542, "learning_rate": 1.372376024822748e-07, "loss": 0.3458, "step": 17327 }, { "epoch": 2.51, "grad_norm": 9.089983940124512, "learning_rate": 1.3715758410152888e-07, "loss": 0.3658, "step": 17328 }, { "epoch": 2.51, "grad_norm": 7.677693843841553, "learning_rate": 1.3707758733789798e-07, "loss": 0.3151, "step": 17329 }, { "epoch": 2.51, "grad_norm": 8.494152069091797, "learning_rate": 1.3699761219338558e-07, "loss": 0.334, "step": 17330 }, { "epoch": 2.51, "grad_norm": 7.821234703063965, "learning_rate": 1.3691765866999616e-07, "loss": 0.3163, "step": 17331 }, { "epoch": 2.51, "grad_norm": 8.964548110961914, "learning_rate": 1.3683772676973204e-07, "loss": 0.3516, "step": 17332 }, { "epoch": 2.51, "grad_norm": 8.210251808166504, "learning_rate": 1.3675781649459673e-07, "loss": 0.3188, "step": 17333 }, { "epoch": 2.52, "grad_norm": 7.667718410491943, "learning_rate": 1.3667792784659138e-07, "loss": 0.2744, "step": 17334 }, { "epoch": 2.52, "grad_norm": 8.48856258392334, "learning_rate": 1.3659806082771752e-07, "loss": 0.3035, "step": 17335 }, { "epoch": 2.52, "grad_norm": 8.931163787841797, "learning_rate": 1.3651821543997688e-07, "loss": 0.3338, "step": 17336 }, { "epoch": 2.52, "grad_norm": 9.308414459228516, "learning_rate": 1.3643839168536886e-07, "loss": 0.3262, "step": 17337 }, { "epoch": 2.52, "grad_norm": 8.381362915039062, "learning_rate": 1.363585895658943e-07, "loss": 0.3339, "step": 17338 }, { "epoch": 2.52, "grad_norm": 8.808917999267578, "learning_rate": 1.3627880908355162e-07, "loss": 0.2855, "step": 17339 }, { "epoch": 2.52, "grad_norm": 7.834999084472656, "learning_rate": 1.361990502403403e-07, "loss": 0.3515, "step": 17340 }, { "epoch": 2.52, "grad_norm": 9.045442581176758, "learning_rate": 1.3611931303825807e-07, "loss": 0.3647, "step": 17341 }, { "epoch": 2.52, "grad_norm": 8.703245162963867, "learning_rate": 1.3603959747930282e-07, "loss": 0.3581, "step": 17342 }, { "epoch": 2.52, "grad_norm": 8.018598556518555, "learning_rate": 1.359599035654716e-07, "loss": 0.3472, "step": 17343 }, { "epoch": 2.52, "grad_norm": 8.667997360229492, "learning_rate": 1.3588023129876113e-07, "loss": 0.3152, "step": 17344 }, { "epoch": 2.52, "grad_norm": 8.472930908203125, "learning_rate": 1.358005806811674e-07, "loss": 0.3656, "step": 17345 }, { "epoch": 2.52, "grad_norm": 9.116432189941406, "learning_rate": 1.3572095171468588e-07, "loss": 0.3533, "step": 17346 }, { "epoch": 2.52, "grad_norm": 9.10795783996582, "learning_rate": 1.3564134440131168e-07, "loss": 0.3804, "step": 17347 }, { "epoch": 2.52, "grad_norm": 7.744917392730713, "learning_rate": 1.3556175874303943e-07, "loss": 0.3025, "step": 17348 }, { "epoch": 2.52, "grad_norm": 8.49083423614502, "learning_rate": 1.3548219474186228e-07, "loss": 0.3388, "step": 17349 }, { "epoch": 2.52, "grad_norm": 8.815483093261719, "learning_rate": 1.3540265239977445e-07, "loss": 0.3405, "step": 17350 }, { "epoch": 2.52, "grad_norm": 8.824299812316895, "learning_rate": 1.3532313171876796e-07, "loss": 0.3835, "step": 17351 }, { "epoch": 2.52, "grad_norm": 8.895404815673828, "learning_rate": 1.3524363270083583e-07, "loss": 0.434, "step": 17352 }, { "epoch": 2.52, "grad_norm": 8.362344741821289, "learning_rate": 1.3516415534796898e-07, "loss": 0.3756, "step": 17353 }, { "epoch": 2.52, "grad_norm": 11.174995422363281, "learning_rate": 1.3508469966215942e-07, "loss": 0.3754, "step": 17354 }, { "epoch": 2.52, "grad_norm": 8.602916717529297, "learning_rate": 1.3500526564539717e-07, "loss": 0.3775, "step": 17355 }, { "epoch": 2.52, "grad_norm": 8.57099437713623, "learning_rate": 1.3492585329967255e-07, "loss": 0.332, "step": 17356 }, { "epoch": 2.52, "grad_norm": 7.935879707336426, "learning_rate": 1.3484646262697508e-07, "loss": 0.3371, "step": 17357 }, { "epoch": 2.52, "grad_norm": 9.292218208312988, "learning_rate": 1.3476709362929373e-07, "loss": 0.3625, "step": 17358 }, { "epoch": 2.52, "grad_norm": 8.452406883239746, "learning_rate": 1.34687746308617e-07, "loss": 0.3582, "step": 17359 }, { "epoch": 2.52, "grad_norm": 9.753202438354492, "learning_rate": 1.3460842066693278e-07, "loss": 0.3429, "step": 17360 }, { "epoch": 2.52, "grad_norm": 8.558815956115723, "learning_rate": 1.3452911670622857e-07, "loss": 0.3132, "step": 17361 }, { "epoch": 2.52, "grad_norm": 9.19373607635498, "learning_rate": 1.3444983442849101e-07, "loss": 0.335, "step": 17362 }, { "epoch": 2.52, "grad_norm": 8.815698623657227, "learning_rate": 1.343705738357065e-07, "loss": 0.3496, "step": 17363 }, { "epoch": 2.52, "grad_norm": 8.879473686218262, "learning_rate": 1.3429133492986066e-07, "loss": 0.3223, "step": 17364 }, { "epoch": 2.52, "grad_norm": 10.024860382080078, "learning_rate": 1.3421211771293893e-07, "loss": 0.3977, "step": 17365 }, { "epoch": 2.52, "grad_norm": 8.623109817504883, "learning_rate": 1.3413292218692573e-07, "loss": 0.4133, "step": 17366 }, { "epoch": 2.52, "grad_norm": 7.950014591217041, "learning_rate": 1.3405374835380545e-07, "loss": 0.3088, "step": 17367 }, { "epoch": 2.52, "grad_norm": 9.106709480285645, "learning_rate": 1.3397459621556128e-07, "loss": 0.3943, "step": 17368 }, { "epoch": 2.52, "grad_norm": 9.701858520507812, "learning_rate": 1.3389546577417688e-07, "loss": 0.3472, "step": 17369 }, { "epoch": 2.52, "grad_norm": 9.650355339050293, "learning_rate": 1.3381635703163375e-07, "loss": 0.3632, "step": 17370 }, { "epoch": 2.52, "grad_norm": 7.52284574508667, "learning_rate": 1.3373726998991486e-07, "loss": 0.2756, "step": 17371 }, { "epoch": 2.52, "grad_norm": 8.473102569580078, "learning_rate": 1.3365820465100064e-07, "loss": 0.3846, "step": 17372 }, { "epoch": 2.52, "grad_norm": 8.744194030761719, "learning_rate": 1.3357916101687293e-07, "loss": 0.3935, "step": 17373 }, { "epoch": 2.52, "grad_norm": 7.999208927154541, "learning_rate": 1.3350013908951119e-07, "loss": 0.3073, "step": 17374 }, { "epoch": 2.52, "grad_norm": 7.707830905914307, "learning_rate": 1.334211388708959e-07, "loss": 0.3102, "step": 17375 }, { "epoch": 2.52, "grad_norm": 9.496925354003906, "learning_rate": 1.333421603630057e-07, "loss": 0.3612, "step": 17376 }, { "epoch": 2.52, "grad_norm": 9.235910415649414, "learning_rate": 1.3326320356781962e-07, "loss": 0.3899, "step": 17377 }, { "epoch": 2.52, "grad_norm": 7.690380096435547, "learning_rate": 1.3318426848731555e-07, "loss": 0.336, "step": 17378 }, { "epoch": 2.52, "grad_norm": 8.382437705993652, "learning_rate": 1.3310535512347122e-07, "loss": 0.3319, "step": 17379 }, { "epoch": 2.52, "grad_norm": 9.589699745178223, "learning_rate": 1.3302646347826374e-07, "loss": 0.3622, "step": 17380 }, { "epoch": 2.52, "grad_norm": 7.701093673706055, "learning_rate": 1.3294759355366948e-07, "loss": 0.3156, "step": 17381 }, { "epoch": 2.52, "grad_norm": 8.922517776489258, "learning_rate": 1.3286874535166447e-07, "loss": 0.2947, "step": 17382 }, { "epoch": 2.52, "grad_norm": 7.686730861663818, "learning_rate": 1.327899188742241e-07, "loss": 0.3246, "step": 17383 }, { "epoch": 2.52, "grad_norm": 8.2571382522583, "learning_rate": 1.3271111412332315e-07, "loss": 0.3602, "step": 17384 }, { "epoch": 2.52, "grad_norm": 8.70617961883545, "learning_rate": 1.3263233110093642e-07, "loss": 0.2913, "step": 17385 }, { "epoch": 2.52, "grad_norm": 8.53258991241455, "learning_rate": 1.3255356980903675e-07, "loss": 0.3099, "step": 17386 }, { "epoch": 2.52, "grad_norm": 9.651165962219238, "learning_rate": 1.3247483024959826e-07, "loss": 0.3636, "step": 17387 }, { "epoch": 2.52, "grad_norm": 8.587016105651855, "learning_rate": 1.3239611242459326e-07, "loss": 0.3436, "step": 17388 }, { "epoch": 2.52, "grad_norm": 8.461915016174316, "learning_rate": 1.3231741633599392e-07, "loss": 0.3631, "step": 17389 }, { "epoch": 2.52, "grad_norm": 7.899376392364502, "learning_rate": 1.322387419857721e-07, "loss": 0.3351, "step": 17390 }, { "epoch": 2.52, "grad_norm": 8.1172513961792, "learning_rate": 1.3216008937589829e-07, "loss": 0.3778, "step": 17391 }, { "epoch": 2.52, "grad_norm": 7.614088535308838, "learning_rate": 1.3208145850834372e-07, "loss": 0.3346, "step": 17392 }, { "epoch": 2.52, "grad_norm": 8.984153747558594, "learning_rate": 1.3200284938507756e-07, "loss": 0.3984, "step": 17393 }, { "epoch": 2.52, "grad_norm": 8.008245468139648, "learning_rate": 1.3192426200807006e-07, "loss": 0.3585, "step": 17394 }, { "epoch": 2.52, "grad_norm": 8.591160774230957, "learning_rate": 1.3184569637928956e-07, "loss": 0.3145, "step": 17395 }, { "epoch": 2.52, "grad_norm": 8.93398666381836, "learning_rate": 1.3176715250070448e-07, "loss": 0.3287, "step": 17396 }, { "epoch": 2.52, "grad_norm": 9.406135559082031, "learning_rate": 1.3168863037428267e-07, "loss": 0.4497, "step": 17397 }, { "epoch": 2.52, "grad_norm": 10.460617065429688, "learning_rate": 1.316101300019914e-07, "loss": 0.376, "step": 17398 }, { "epoch": 2.52, "grad_norm": 8.769718170166016, "learning_rate": 1.3153165138579724e-07, "loss": 0.3146, "step": 17399 }, { "epoch": 2.52, "grad_norm": 8.286026954650879, "learning_rate": 1.3145319452766655e-07, "loss": 0.34, "step": 17400 }, { "epoch": 2.52, "grad_norm": 8.834136962890625, "learning_rate": 1.3137475942956476e-07, "loss": 0.283, "step": 17401 }, { "epoch": 2.52, "grad_norm": 8.724014282226562, "learning_rate": 1.3129634609345709e-07, "loss": 0.3071, "step": 17402 }, { "epoch": 2.53, "grad_norm": 7.87667989730835, "learning_rate": 1.312179545213079e-07, "loss": 0.3173, "step": 17403 }, { "epoch": 2.53, "grad_norm": 10.039811134338379, "learning_rate": 1.3113958471508146e-07, "loss": 0.4367, "step": 17404 }, { "epoch": 2.53, "grad_norm": 8.486745834350586, "learning_rate": 1.3106123667674062e-07, "loss": 0.3594, "step": 17405 }, { "epoch": 2.53, "grad_norm": 7.605478286743164, "learning_rate": 1.3098291040824894e-07, "loss": 0.3463, "step": 17406 }, { "epoch": 2.53, "grad_norm": 8.577645301818848, "learning_rate": 1.3090460591156803e-07, "loss": 0.3765, "step": 17407 }, { "epoch": 2.53, "grad_norm": 8.7284574508667, "learning_rate": 1.308263231886606e-07, "loss": 0.3025, "step": 17408 }, { "epoch": 2.53, "grad_norm": 8.119940757751465, "learning_rate": 1.3074806224148694e-07, "loss": 0.3477, "step": 17409 }, { "epoch": 2.53, "grad_norm": 9.685432434082031, "learning_rate": 1.3066982307200858e-07, "loss": 0.4088, "step": 17410 }, { "epoch": 2.53, "grad_norm": 9.969549179077148, "learning_rate": 1.3059160568218507e-07, "loss": 0.3808, "step": 17411 }, { "epoch": 2.53, "grad_norm": 7.85188627243042, "learning_rate": 1.3051341007397597e-07, "loss": 0.3033, "step": 17412 }, { "epoch": 2.53, "grad_norm": 9.236332893371582, "learning_rate": 1.3043523624934117e-07, "loss": 0.3132, "step": 17413 }, { "epoch": 2.53, "grad_norm": 10.07291030883789, "learning_rate": 1.3035708421023805e-07, "loss": 0.3863, "step": 17414 }, { "epoch": 2.53, "grad_norm": 8.345135688781738, "learning_rate": 1.3027895395862577e-07, "loss": 0.3643, "step": 17415 }, { "epoch": 2.53, "grad_norm": 9.725407600402832, "learning_rate": 1.3020084549646082e-07, "loss": 0.4046, "step": 17416 }, { "epoch": 2.53, "grad_norm": 8.56461238861084, "learning_rate": 1.3012275882570045e-07, "loss": 0.3613, "step": 17417 }, { "epoch": 2.53, "grad_norm": 8.131009101867676, "learning_rate": 1.3004469394830087e-07, "loss": 0.335, "step": 17418 }, { "epoch": 2.53, "grad_norm": 7.7618088722229, "learning_rate": 1.2996665086621805e-07, "loss": 0.3186, "step": 17419 }, { "epoch": 2.53, "grad_norm": 8.698248863220215, "learning_rate": 1.298886295814071e-07, "loss": 0.3988, "step": 17420 }, { "epoch": 2.53, "grad_norm": 8.080330848693848, "learning_rate": 1.2981063009582283e-07, "loss": 0.3383, "step": 17421 }, { "epoch": 2.53, "grad_norm": 9.47217082977295, "learning_rate": 1.2973265241141917e-07, "loss": 0.3909, "step": 17422 }, { "epoch": 2.53, "grad_norm": 7.93804407119751, "learning_rate": 1.2965469653015005e-07, "loss": 0.3311, "step": 17423 }, { "epoch": 2.53, "grad_norm": 10.697088241577148, "learning_rate": 1.2957676245396821e-07, "loss": 0.4312, "step": 17424 }, { "epoch": 2.53, "grad_norm": 8.942277908325195, "learning_rate": 1.2949885018482653e-07, "loss": 0.4299, "step": 17425 }, { "epoch": 2.53, "grad_norm": 7.5828704833984375, "learning_rate": 1.2942095972467638e-07, "loss": 0.3146, "step": 17426 }, { "epoch": 2.53, "grad_norm": 8.822711944580078, "learning_rate": 1.2934309107546993e-07, "loss": 0.4021, "step": 17427 }, { "epoch": 2.53, "grad_norm": 8.820808410644531, "learning_rate": 1.2926524423915718e-07, "loss": 0.4068, "step": 17428 }, { "epoch": 2.53, "grad_norm": 8.720962524414062, "learning_rate": 1.291874192176894e-07, "loss": 0.3742, "step": 17429 }, { "epoch": 2.53, "grad_norm": 7.9487996101379395, "learning_rate": 1.2910961601301551e-07, "loss": 0.3558, "step": 17430 }, { "epoch": 2.53, "grad_norm": 8.514659881591797, "learning_rate": 1.2903183462708556e-07, "loss": 0.3409, "step": 17431 }, { "epoch": 2.53, "grad_norm": 8.832014083862305, "learning_rate": 1.2895407506184763e-07, "loss": 0.3444, "step": 17432 }, { "epoch": 2.53, "grad_norm": 8.838210105895996, "learning_rate": 1.2887633731924997e-07, "loss": 0.37, "step": 17433 }, { "epoch": 2.53, "grad_norm": 7.683141708374023, "learning_rate": 1.2879862140124032e-07, "loss": 0.3552, "step": 17434 }, { "epoch": 2.53, "grad_norm": 8.475945472717285, "learning_rate": 1.287209273097657e-07, "loss": 0.3605, "step": 17435 }, { "epoch": 2.53, "grad_norm": 10.08822250366211, "learning_rate": 1.286432550467724e-07, "loss": 0.3773, "step": 17436 }, { "epoch": 2.53, "grad_norm": 8.569703102111816, "learning_rate": 1.2856560461420673e-07, "loss": 0.334, "step": 17437 }, { "epoch": 2.53, "grad_norm": 7.968567371368408, "learning_rate": 1.2848797601401384e-07, "loss": 0.3693, "step": 17438 }, { "epoch": 2.53, "grad_norm": 7.8809051513671875, "learning_rate": 1.2841036924813863e-07, "loss": 0.3589, "step": 17439 }, { "epoch": 2.53, "grad_norm": 8.024633407592773, "learning_rate": 1.2833278431852547e-07, "loss": 0.314, "step": 17440 }, { "epoch": 2.53, "grad_norm": 8.478407859802246, "learning_rate": 1.2825522122711795e-07, "loss": 0.3332, "step": 17441 }, { "epoch": 2.53, "grad_norm": 9.133865356445312, "learning_rate": 1.281776799758596e-07, "loss": 0.3776, "step": 17442 }, { "epoch": 2.53, "grad_norm": 9.221612930297852, "learning_rate": 1.2810016056669282e-07, "loss": 0.3064, "step": 17443 }, { "epoch": 2.53, "grad_norm": 9.465666770935059, "learning_rate": 1.2802266300155984e-07, "loss": 0.3953, "step": 17444 }, { "epoch": 2.53, "grad_norm": 8.477766036987305, "learning_rate": 1.2794518728240223e-07, "loss": 0.3648, "step": 17445 }, { "epoch": 2.53, "grad_norm": 9.362730979919434, "learning_rate": 1.278677334111613e-07, "loss": 0.3807, "step": 17446 }, { "epoch": 2.53, "grad_norm": 9.762528419494629, "learning_rate": 1.2779030138977676e-07, "loss": 0.3905, "step": 17447 }, { "epoch": 2.53, "grad_norm": 8.74634838104248, "learning_rate": 1.2771289122018947e-07, "loss": 0.3603, "step": 17448 }, { "epoch": 2.53, "grad_norm": 9.04433536529541, "learning_rate": 1.276355029043379e-07, "loss": 0.4233, "step": 17449 }, { "epoch": 2.53, "grad_norm": 10.25918197631836, "learning_rate": 1.275581364441618e-07, "loss": 0.3653, "step": 17450 }, { "epoch": 2.53, "grad_norm": 9.201695442199707, "learning_rate": 1.2748079184159877e-07, "loss": 0.3977, "step": 17451 }, { "epoch": 2.53, "grad_norm": 9.502368927001953, "learning_rate": 1.274034690985869e-07, "loss": 0.4299, "step": 17452 }, { "epoch": 2.53, "grad_norm": 10.561675071716309, "learning_rate": 1.2732616821706322e-07, "loss": 0.3802, "step": 17453 }, { "epoch": 2.53, "grad_norm": 8.447022438049316, "learning_rate": 1.272488891989645e-07, "loss": 0.3772, "step": 17454 }, { "epoch": 2.53, "grad_norm": 8.801128387451172, "learning_rate": 1.2717163204622673e-07, "loss": 0.3624, "step": 17455 }, { "epoch": 2.53, "grad_norm": 8.89591121673584, "learning_rate": 1.2709439676078558e-07, "loss": 0.3456, "step": 17456 }, { "epoch": 2.53, "grad_norm": 7.756758689880371, "learning_rate": 1.27017183344576e-07, "loss": 0.3232, "step": 17457 }, { "epoch": 2.53, "grad_norm": 7.889140605926514, "learning_rate": 1.2693999179953242e-07, "loss": 0.3016, "step": 17458 }, { "epoch": 2.53, "grad_norm": 9.392217636108398, "learning_rate": 1.2686282212758882e-07, "loss": 0.3605, "step": 17459 }, { "epoch": 2.53, "grad_norm": 9.277621269226074, "learning_rate": 1.2678567433067866e-07, "loss": 0.3249, "step": 17460 }, { "epoch": 2.53, "grad_norm": 8.595612525939941, "learning_rate": 1.267085484107342e-07, "loss": 0.3669, "step": 17461 }, { "epoch": 2.53, "grad_norm": 8.317096710205078, "learning_rate": 1.2663144436968852e-07, "loss": 0.3625, "step": 17462 }, { "epoch": 2.53, "grad_norm": 8.79654598236084, "learning_rate": 1.2655436220947246e-07, "loss": 0.2979, "step": 17463 }, { "epoch": 2.53, "grad_norm": 9.017326354980469, "learning_rate": 1.26477301932018e-07, "loss": 0.363, "step": 17464 }, { "epoch": 2.53, "grad_norm": 9.289974212646484, "learning_rate": 1.2640026353925526e-07, "loss": 0.3533, "step": 17465 }, { "epoch": 2.53, "grad_norm": 8.85272216796875, "learning_rate": 1.2632324703311458e-07, "loss": 0.3553, "step": 17466 }, { "epoch": 2.53, "grad_norm": 7.9312872886657715, "learning_rate": 1.2624625241552556e-07, "loss": 0.3671, "step": 17467 }, { "epoch": 2.53, "grad_norm": 7.9561052322387695, "learning_rate": 1.261692796884165e-07, "loss": 0.3114, "step": 17468 }, { "epoch": 2.53, "grad_norm": 9.726473808288574, "learning_rate": 1.260923288537168e-07, "loss": 0.3773, "step": 17469 }, { "epoch": 2.53, "grad_norm": 8.359968185424805, "learning_rate": 1.260153999133534e-07, "loss": 0.3662, "step": 17470 }, { "epoch": 2.53, "grad_norm": 8.70760726928711, "learning_rate": 1.259384928692545e-07, "loss": 0.3419, "step": 17471 }, { "epoch": 2.54, "grad_norm": 10.26504135131836, "learning_rate": 1.258616077233463e-07, "loss": 0.4315, "step": 17472 }, { "epoch": 2.54, "grad_norm": 8.76320743560791, "learning_rate": 1.2578474447755517e-07, "loss": 0.3765, "step": 17473 }, { "epoch": 2.54, "grad_norm": 9.998828887939453, "learning_rate": 1.2570790313380674e-07, "loss": 0.4069, "step": 17474 }, { "epoch": 2.54, "grad_norm": 8.989015579223633, "learning_rate": 1.256310836940263e-07, "loss": 0.3491, "step": 17475 }, { "epoch": 2.54, "grad_norm": 8.600976943969727, "learning_rate": 1.255542861601384e-07, "loss": 0.3458, "step": 17476 }, { "epoch": 2.54, "grad_norm": 7.464319229125977, "learning_rate": 1.2547751053406696e-07, "loss": 0.3177, "step": 17477 }, { "epoch": 2.54, "grad_norm": 8.510132789611816, "learning_rate": 1.2540075681773555e-07, "loss": 0.3048, "step": 17478 }, { "epoch": 2.54, "grad_norm": 8.416899681091309, "learning_rate": 1.2532402501306715e-07, "loss": 0.3506, "step": 17479 }, { "epoch": 2.54, "grad_norm": 9.770837783813477, "learning_rate": 1.2524731512198406e-07, "loss": 0.4103, "step": 17480 }, { "epoch": 2.54, "grad_norm": 8.994124412536621, "learning_rate": 1.2517062714640835e-07, "loss": 0.36, "step": 17481 }, { "epoch": 2.54, "grad_norm": 9.440013885498047, "learning_rate": 1.2509396108826077e-07, "loss": 0.3674, "step": 17482 }, { "epoch": 2.54, "grad_norm": 8.195449829101562, "learning_rate": 1.2501731694946284e-07, "loss": 0.3775, "step": 17483 }, { "epoch": 2.54, "grad_norm": 8.246158599853516, "learning_rate": 1.249406947319338e-07, "loss": 0.375, "step": 17484 }, { "epoch": 2.54, "grad_norm": 7.460627555847168, "learning_rate": 1.2486409443759437e-07, "loss": 0.3335, "step": 17485 }, { "epoch": 2.54, "grad_norm": 8.91550064086914, "learning_rate": 1.247875160683629e-07, "loss": 0.3228, "step": 17486 }, { "epoch": 2.54, "grad_norm": 9.125823020935059, "learning_rate": 1.2471095962615806e-07, "loss": 0.3895, "step": 17487 }, { "epoch": 2.54, "grad_norm": 8.658758163452148, "learning_rate": 1.2463442511289801e-07, "loss": 0.3081, "step": 17488 }, { "epoch": 2.54, "grad_norm": 7.492929458618164, "learning_rate": 1.2455791253050006e-07, "loss": 0.3233, "step": 17489 }, { "epoch": 2.54, "grad_norm": 8.840798377990723, "learning_rate": 1.244814218808813e-07, "loss": 0.362, "step": 17490 }, { "epoch": 2.54, "grad_norm": 8.025614738464355, "learning_rate": 1.2440495316595757e-07, "loss": 0.3207, "step": 17491 }, { "epoch": 2.54, "grad_norm": 9.385565757751465, "learning_rate": 1.243285063876456e-07, "loss": 0.3758, "step": 17492 }, { "epoch": 2.54, "grad_norm": 8.815117835998535, "learning_rate": 1.2425208154785982e-07, "loss": 0.3812, "step": 17493 }, { "epoch": 2.54, "grad_norm": 8.186671257019043, "learning_rate": 1.241756786485152e-07, "loss": 0.3659, "step": 17494 }, { "epoch": 2.54, "grad_norm": 8.936150550842285, "learning_rate": 1.2409929769152582e-07, "loss": 0.3874, "step": 17495 }, { "epoch": 2.54, "grad_norm": 8.775851249694824, "learning_rate": 1.240229386788054e-07, "loss": 0.3174, "step": 17496 }, { "epoch": 2.54, "grad_norm": 10.066577911376953, "learning_rate": 1.239466016122669e-07, "loss": 0.3623, "step": 17497 }, { "epoch": 2.54, "grad_norm": 7.278930187225342, "learning_rate": 1.2387028649382292e-07, "loss": 0.2757, "step": 17498 }, { "epoch": 2.54, "grad_norm": 8.614450454711914, "learning_rate": 1.237939933253853e-07, "loss": 0.3847, "step": 17499 }, { "epoch": 2.54, "grad_norm": 9.381896018981934, "learning_rate": 1.2371772210886557e-07, "loss": 0.3891, "step": 17500 }, { "epoch": 2.54, "grad_norm": 8.442447662353516, "learning_rate": 1.2364147284617443e-07, "loss": 0.3179, "step": 17501 }, { "epoch": 2.54, "grad_norm": 8.950878143310547, "learning_rate": 1.2356524553922255e-07, "loss": 0.3898, "step": 17502 }, { "epoch": 2.54, "grad_norm": 9.750527381896973, "learning_rate": 1.234890401899189e-07, "loss": 0.4361, "step": 17503 }, { "epoch": 2.54, "grad_norm": 9.084077835083008, "learning_rate": 1.2341285680017355e-07, "loss": 0.3746, "step": 17504 }, { "epoch": 2.54, "grad_norm": 8.366337776184082, "learning_rate": 1.233366953718944e-07, "loss": 0.3673, "step": 17505 }, { "epoch": 2.54, "grad_norm": 8.690202713012695, "learning_rate": 1.2326055590699037e-07, "loss": 0.3204, "step": 17506 }, { "epoch": 2.54, "grad_norm": 7.952086925506592, "learning_rate": 1.2318443840736824e-07, "loss": 0.3265, "step": 17507 }, { "epoch": 2.54, "grad_norm": 8.34119987487793, "learning_rate": 1.2310834287493545e-07, "loss": 0.3184, "step": 17508 }, { "epoch": 2.54, "grad_norm": 7.67303466796875, "learning_rate": 1.2303226931159826e-07, "loss": 0.3575, "step": 17509 }, { "epoch": 2.54, "grad_norm": 9.183072090148926, "learning_rate": 1.2295621771926268e-07, "loss": 0.3871, "step": 17510 }, { "epoch": 2.54, "grad_norm": 7.93086051940918, "learning_rate": 1.2288018809983402e-07, "loss": 0.281, "step": 17511 }, { "epoch": 2.54, "grad_norm": 9.801719665527344, "learning_rate": 1.22804180455217e-07, "loss": 0.3909, "step": 17512 }, { "epoch": 2.54, "grad_norm": 8.938435554504395, "learning_rate": 1.22728194787316e-07, "loss": 0.366, "step": 17513 }, { "epoch": 2.54, "grad_norm": 8.356825828552246, "learning_rate": 1.226522310980347e-07, "loss": 0.3698, "step": 17514 }, { "epoch": 2.54, "grad_norm": 8.978590965270996, "learning_rate": 1.2257628938927621e-07, "loss": 0.3245, "step": 17515 }, { "epoch": 2.54, "grad_norm": 7.979997158050537, "learning_rate": 1.2250036966294307e-07, "loss": 0.3207, "step": 17516 }, { "epoch": 2.54, "grad_norm": 8.221299171447754, "learning_rate": 1.2242447192093742e-07, "loss": 0.3534, "step": 17517 }, { "epoch": 2.54, "grad_norm": 8.593649864196777, "learning_rate": 1.223485961651607e-07, "loss": 0.3627, "step": 17518 }, { "epoch": 2.54, "grad_norm": 10.03043270111084, "learning_rate": 1.2227274239751385e-07, "loss": 0.3483, "step": 17519 }, { "epoch": 2.54, "grad_norm": 9.484777450561523, "learning_rate": 1.2219691061989734e-07, "loss": 0.3839, "step": 17520 }, { "epoch": 2.54, "grad_norm": 8.18248176574707, "learning_rate": 1.221211008342109e-07, "loss": 0.3147, "step": 17521 }, { "epoch": 2.54, "grad_norm": 8.27529239654541, "learning_rate": 1.2204531304235387e-07, "loss": 0.3314, "step": 17522 }, { "epoch": 2.54, "grad_norm": 9.91810417175293, "learning_rate": 1.2196954724622522e-07, "loss": 0.3954, "step": 17523 }, { "epoch": 2.54, "grad_norm": 8.188478469848633, "learning_rate": 1.218938034477225e-07, "loss": 0.2876, "step": 17524 }, { "epoch": 2.54, "grad_norm": 8.055784225463867, "learning_rate": 1.218180816487442e-07, "loss": 0.3503, "step": 17525 }, { "epoch": 2.54, "grad_norm": 8.564823150634766, "learning_rate": 1.2174238185118657e-07, "loss": 0.3204, "step": 17526 }, { "epoch": 2.54, "grad_norm": 8.436948776245117, "learning_rate": 1.216667040569469e-07, "loss": 0.3108, "step": 17527 }, { "epoch": 2.54, "grad_norm": 8.4968900680542, "learning_rate": 1.2159104826792066e-07, "loss": 0.3717, "step": 17528 }, { "epoch": 2.54, "grad_norm": 9.29624080657959, "learning_rate": 1.2151541448600334e-07, "loss": 0.3859, "step": 17529 }, { "epoch": 2.54, "grad_norm": 9.699028015136719, "learning_rate": 1.2143980271309007e-07, "loss": 0.3187, "step": 17530 }, { "epoch": 2.54, "grad_norm": 9.660619735717773, "learning_rate": 1.213642129510749e-07, "loss": 0.3966, "step": 17531 }, { "epoch": 2.54, "grad_norm": 9.131281852722168, "learning_rate": 1.2128864520185177e-07, "loss": 0.3539, "step": 17532 }, { "epoch": 2.54, "grad_norm": 9.320475578308105, "learning_rate": 1.2121309946731396e-07, "loss": 0.3687, "step": 17533 }, { "epoch": 2.54, "grad_norm": 8.796902656555176, "learning_rate": 1.21137575749354e-07, "loss": 0.374, "step": 17534 }, { "epoch": 2.54, "grad_norm": 8.744479179382324, "learning_rate": 1.2106207404986412e-07, "loss": 0.3441, "step": 17535 }, { "epoch": 2.54, "grad_norm": 8.422537803649902, "learning_rate": 1.2098659437073577e-07, "loss": 0.3475, "step": 17536 }, { "epoch": 2.54, "grad_norm": 8.071372985839844, "learning_rate": 1.2091113671386032e-07, "loss": 0.3348, "step": 17537 }, { "epoch": 2.54, "grad_norm": 8.282939910888672, "learning_rate": 1.208357010811276e-07, "loss": 0.3192, "step": 17538 }, { "epoch": 2.54, "grad_norm": 9.048598289489746, "learning_rate": 1.2076028747442822e-07, "loss": 0.39, "step": 17539 }, { "epoch": 2.54, "grad_norm": 9.02399730682373, "learning_rate": 1.206848958956509e-07, "loss": 0.3661, "step": 17540 }, { "epoch": 2.55, "grad_norm": 8.846229553222656, "learning_rate": 1.2060952634668507e-07, "loss": 0.4036, "step": 17541 }, { "epoch": 2.55, "grad_norm": 8.466193199157715, "learning_rate": 1.2053417882941863e-07, "loss": 0.3596, "step": 17542 }, { "epoch": 2.55, "grad_norm": 9.095664024353027, "learning_rate": 1.2045885334573903e-07, "loss": 0.3327, "step": 17543 }, { "epoch": 2.55, "grad_norm": 8.262216567993164, "learning_rate": 1.203835498975343e-07, "loss": 0.3297, "step": 17544 }, { "epoch": 2.55, "grad_norm": 7.867456436157227, "learning_rate": 1.2030826848668995e-07, "loss": 0.2895, "step": 17545 }, { "epoch": 2.55, "grad_norm": 8.646540641784668, "learning_rate": 1.2023300911509303e-07, "loss": 0.3429, "step": 17546 }, { "epoch": 2.55, "grad_norm": 7.022187232971191, "learning_rate": 1.2015777178462827e-07, "loss": 0.3188, "step": 17547 }, { "epoch": 2.55, "grad_norm": 9.081356048583984, "learning_rate": 1.2008255649718124e-07, "loss": 0.4235, "step": 17548 }, { "epoch": 2.55, "grad_norm": 8.802451133728027, "learning_rate": 1.200073632546359e-07, "loss": 0.3593, "step": 17549 }, { "epoch": 2.55, "grad_norm": 7.812537670135498, "learning_rate": 1.1993219205887628e-07, "loss": 0.316, "step": 17550 }, { "epoch": 2.55, "grad_norm": 8.548802375793457, "learning_rate": 1.1985704291178555e-07, "loss": 0.3404, "step": 17551 }, { "epoch": 2.55, "grad_norm": 7.791016101837158, "learning_rate": 1.1978191581524654e-07, "loss": 0.3319, "step": 17552 }, { "epoch": 2.55, "grad_norm": 8.612530708312988, "learning_rate": 1.1970681077114142e-07, "loss": 0.3193, "step": 17553 }, { "epoch": 2.55, "grad_norm": 9.37961196899414, "learning_rate": 1.1963172778135188e-07, "loss": 0.3891, "step": 17554 }, { "epoch": 2.55, "grad_norm": 9.673797607421875, "learning_rate": 1.195566668477589e-07, "loss": 0.3379, "step": 17555 }, { "epoch": 2.55, "grad_norm": 9.483976364135742, "learning_rate": 1.1948162797224303e-07, "loss": 0.4257, "step": 17556 }, { "epoch": 2.55, "grad_norm": 8.180371284484863, "learning_rate": 1.194066111566844e-07, "loss": 0.3168, "step": 17557 }, { "epoch": 2.55, "grad_norm": 8.179256439208984, "learning_rate": 1.193316164029624e-07, "loss": 0.3026, "step": 17558 }, { "epoch": 2.55, "grad_norm": 10.463868141174316, "learning_rate": 1.1925664371295552e-07, "loss": 0.3514, "step": 17559 }, { "epoch": 2.55, "grad_norm": 9.14427661895752, "learning_rate": 1.1918169308854276e-07, "loss": 0.3838, "step": 17560 }, { "epoch": 2.55, "grad_norm": 8.97406005859375, "learning_rate": 1.1910676453160096e-07, "loss": 0.3769, "step": 17561 }, { "epoch": 2.55, "grad_norm": 8.75338363647461, "learning_rate": 1.190318580440084e-07, "loss": 0.3329, "step": 17562 }, { "epoch": 2.55, "grad_norm": 10.223594665527344, "learning_rate": 1.1895697362764102e-07, "loss": 0.3629, "step": 17563 }, { "epoch": 2.55, "grad_norm": 8.362436294555664, "learning_rate": 1.1888211128437508e-07, "loss": 0.3449, "step": 17564 }, { "epoch": 2.55, "grad_norm": 7.930050849914551, "learning_rate": 1.1880727101608613e-07, "loss": 0.2928, "step": 17565 }, { "epoch": 2.55, "grad_norm": 8.282466888427734, "learning_rate": 1.1873245282464927e-07, "loss": 0.3478, "step": 17566 }, { "epoch": 2.55, "grad_norm": 8.651960372924805, "learning_rate": 1.1865765671193895e-07, "loss": 0.3295, "step": 17567 }, { "epoch": 2.55, "grad_norm": 9.165781021118164, "learning_rate": 1.1858288267982897e-07, "loss": 0.4172, "step": 17568 }, { "epoch": 2.55, "grad_norm": 9.977267265319824, "learning_rate": 1.1850813073019261e-07, "loss": 0.3532, "step": 17569 }, { "epoch": 2.55, "grad_norm": 8.987313270568848, "learning_rate": 1.1843340086490284e-07, "loss": 0.3347, "step": 17570 }, { "epoch": 2.55, "grad_norm": 9.5743989944458, "learning_rate": 1.183586930858319e-07, "loss": 0.3637, "step": 17571 }, { "epoch": 2.55, "grad_norm": 7.987273216247559, "learning_rate": 1.1828400739485123e-07, "loss": 0.3337, "step": 17572 }, { "epoch": 2.55, "grad_norm": 8.03312873840332, "learning_rate": 1.182093437938323e-07, "loss": 0.3199, "step": 17573 }, { "epoch": 2.55, "grad_norm": 8.397936820983887, "learning_rate": 1.1813470228464539e-07, "loss": 0.3472, "step": 17574 }, { "epoch": 2.55, "grad_norm": 8.487764358520508, "learning_rate": 1.1806008286916058e-07, "loss": 0.3579, "step": 17575 }, { "epoch": 2.55, "grad_norm": 8.58103084564209, "learning_rate": 1.1798548554924748e-07, "loss": 0.3607, "step": 17576 }, { "epoch": 2.55, "grad_norm": 7.908242225646973, "learning_rate": 1.1791091032677514e-07, "loss": 0.2953, "step": 17577 }, { "epoch": 2.55, "grad_norm": 9.911052703857422, "learning_rate": 1.1783635720361118e-07, "loss": 0.3814, "step": 17578 }, { "epoch": 2.55, "grad_norm": 9.0360746383667, "learning_rate": 1.1776182618162444e-07, "loss": 0.3584, "step": 17579 }, { "epoch": 2.55, "grad_norm": 8.878496170043945, "learning_rate": 1.1768731726268122e-07, "loss": 0.3986, "step": 17580 }, { "epoch": 2.55, "grad_norm": 9.499101638793945, "learning_rate": 1.17612830448649e-07, "loss": 0.404, "step": 17581 }, { "epoch": 2.55, "grad_norm": 9.109655380249023, "learning_rate": 1.175383657413933e-07, "loss": 0.3578, "step": 17582 }, { "epoch": 2.55, "grad_norm": 8.827420234680176, "learning_rate": 1.174639231427803e-07, "loss": 0.3616, "step": 17583 }, { "epoch": 2.55, "grad_norm": 8.844500541687012, "learning_rate": 1.1738950265467462e-07, "loss": 0.3198, "step": 17584 }, { "epoch": 2.55, "grad_norm": 8.015532493591309, "learning_rate": 1.1731510427894076e-07, "loss": 0.333, "step": 17585 }, { "epoch": 2.55, "grad_norm": 8.929155349731445, "learning_rate": 1.1724072801744278e-07, "loss": 0.377, "step": 17586 }, { "epoch": 2.55, "grad_norm": 8.440967559814453, "learning_rate": 1.1716637387204409e-07, "loss": 0.3308, "step": 17587 }, { "epoch": 2.55, "grad_norm": 8.290626525878906, "learning_rate": 1.1709204184460742e-07, "loss": 0.3509, "step": 17588 }, { "epoch": 2.55, "grad_norm": 8.310821533203125, "learning_rate": 1.1701773193699505e-07, "loss": 0.3636, "step": 17589 }, { "epoch": 2.55, "grad_norm": 9.191650390625, "learning_rate": 1.1694344415106871e-07, "loss": 0.3227, "step": 17590 }, { "epoch": 2.55, "grad_norm": 9.096673965454102, "learning_rate": 1.1686917848868949e-07, "loss": 0.3937, "step": 17591 }, { "epoch": 2.55, "grad_norm": 8.87803840637207, "learning_rate": 1.1679493495171822e-07, "loss": 0.3506, "step": 17592 }, { "epoch": 2.55, "grad_norm": 8.094000816345215, "learning_rate": 1.1672071354201484e-07, "loss": 0.2981, "step": 17593 }, { "epoch": 2.55, "grad_norm": 8.956745147705078, "learning_rate": 1.1664651426143857e-07, "loss": 0.3682, "step": 17594 }, { "epoch": 2.55, "grad_norm": 8.307063102722168, "learning_rate": 1.1657233711184866e-07, "loss": 0.347, "step": 17595 }, { "epoch": 2.55, "grad_norm": 8.455394744873047, "learning_rate": 1.1649818209510343e-07, "loss": 0.3588, "step": 17596 }, { "epoch": 2.55, "grad_norm": 7.848355293273926, "learning_rate": 1.164240492130607e-07, "loss": 0.2703, "step": 17597 }, { "epoch": 2.55, "grad_norm": 7.985707759857178, "learning_rate": 1.1634993846757801e-07, "loss": 0.3153, "step": 17598 }, { "epoch": 2.55, "grad_norm": 8.832779884338379, "learning_rate": 1.1627584986051142e-07, "loss": 0.3477, "step": 17599 }, { "epoch": 2.55, "grad_norm": 9.687309265136719, "learning_rate": 1.16201783393718e-07, "loss": 0.306, "step": 17600 }, { "epoch": 2.55, "grad_norm": 11.3389310836792, "learning_rate": 1.1612773906905237e-07, "loss": 0.365, "step": 17601 }, { "epoch": 2.55, "grad_norm": 8.4660062789917, "learning_rate": 1.1605371688837051e-07, "loss": 0.3264, "step": 17602 }, { "epoch": 2.55, "grad_norm": 9.119050025939941, "learning_rate": 1.1597971685352614e-07, "loss": 0.3678, "step": 17603 }, { "epoch": 2.55, "grad_norm": 8.63818073272705, "learning_rate": 1.159057389663739e-07, "loss": 0.3426, "step": 17604 }, { "epoch": 2.55, "grad_norm": 7.92802095413208, "learning_rate": 1.1583178322876675e-07, "loss": 0.3081, "step": 17605 }, { "epoch": 2.55, "grad_norm": 9.048919677734375, "learning_rate": 1.1575784964255764e-07, "loss": 0.3538, "step": 17606 }, { "epoch": 2.55, "grad_norm": 8.061142921447754, "learning_rate": 1.1568393820959888e-07, "loss": 0.3282, "step": 17607 }, { "epoch": 2.55, "grad_norm": 8.175222396850586, "learning_rate": 1.1561004893174208e-07, "loss": 0.3291, "step": 17608 }, { "epoch": 2.55, "grad_norm": 8.742693901062012, "learning_rate": 1.1553618181083857e-07, "loss": 0.2919, "step": 17609 }, { "epoch": 2.56, "grad_norm": 7.677924156188965, "learning_rate": 1.1546233684873896e-07, "loss": 0.3179, "step": 17610 }, { "epoch": 2.56, "grad_norm": 9.657453536987305, "learning_rate": 1.1538851404729333e-07, "loss": 0.4014, "step": 17611 }, { "epoch": 2.56, "grad_norm": 8.68066692352295, "learning_rate": 1.1531471340835108e-07, "loss": 0.3369, "step": 17612 }, { "epoch": 2.56, "grad_norm": 9.758018493652344, "learning_rate": 1.1524093493376118e-07, "loss": 0.3642, "step": 17613 }, { "epoch": 2.56, "grad_norm": 8.957676887512207, "learning_rate": 1.1516717862537229e-07, "loss": 0.3396, "step": 17614 }, { "epoch": 2.56, "grad_norm": 8.647624969482422, "learning_rate": 1.1509344448503167e-07, "loss": 0.3859, "step": 17615 }, { "epoch": 2.56, "grad_norm": 10.48454761505127, "learning_rate": 1.150197325145874e-07, "loss": 0.3991, "step": 17616 }, { "epoch": 2.56, "grad_norm": 8.943137168884277, "learning_rate": 1.1494604271588526e-07, "loss": 0.3266, "step": 17617 }, { "epoch": 2.56, "grad_norm": 7.749242305755615, "learning_rate": 1.1487237509077252e-07, "loss": 0.3228, "step": 17618 }, { "epoch": 2.56, "grad_norm": 8.167848587036133, "learning_rate": 1.1479872964109394e-07, "loss": 0.3648, "step": 17619 }, { "epoch": 2.56, "grad_norm": 8.75880241394043, "learning_rate": 1.1472510636869493e-07, "loss": 0.3424, "step": 17620 }, { "epoch": 2.56, "grad_norm": 7.838990688323975, "learning_rate": 1.1465150527542e-07, "loss": 0.3441, "step": 17621 }, { "epoch": 2.56, "grad_norm": 9.936355590820312, "learning_rate": 1.1457792636311281e-07, "loss": 0.4362, "step": 17622 }, { "epoch": 2.56, "grad_norm": 9.841193199157715, "learning_rate": 1.1450436963361743e-07, "loss": 0.4432, "step": 17623 }, { "epoch": 2.56, "grad_norm": 8.4366455078125, "learning_rate": 1.1443083508877605e-07, "loss": 0.3443, "step": 17624 }, { "epoch": 2.56, "grad_norm": 8.668798446655273, "learning_rate": 1.143573227304312e-07, "loss": 0.3478, "step": 17625 }, { "epoch": 2.56, "grad_norm": 7.6915130615234375, "learning_rate": 1.1428383256042462e-07, "loss": 0.3215, "step": 17626 }, { "epoch": 2.56, "grad_norm": 8.229022026062012, "learning_rate": 1.1421036458059751e-07, "loss": 0.3445, "step": 17627 }, { "epoch": 2.56, "grad_norm": 9.069273948669434, "learning_rate": 1.1413691879279052e-07, "loss": 0.3616, "step": 17628 }, { "epoch": 2.56, "grad_norm": 7.885776996612549, "learning_rate": 1.140634951988435e-07, "loss": 0.3049, "step": 17629 }, { "epoch": 2.56, "grad_norm": 8.223776817321777, "learning_rate": 1.1399009380059622e-07, "loss": 0.3368, "step": 17630 }, { "epoch": 2.56, "grad_norm": 8.998361587524414, "learning_rate": 1.139167145998875e-07, "loss": 0.3277, "step": 17631 }, { "epoch": 2.56, "grad_norm": 9.630688667297363, "learning_rate": 1.1384335759855579e-07, "loss": 0.3319, "step": 17632 }, { "epoch": 2.56, "grad_norm": 8.91006088256836, "learning_rate": 1.1377002279843906e-07, "loss": 0.3864, "step": 17633 }, { "epoch": 2.56, "grad_norm": 7.514974594116211, "learning_rate": 1.1369671020137406e-07, "loss": 0.3161, "step": 17634 }, { "epoch": 2.56, "grad_norm": 8.900781631469727, "learning_rate": 1.136234198091982e-07, "loss": 0.4126, "step": 17635 }, { "epoch": 2.56, "grad_norm": 9.387382507324219, "learning_rate": 1.1355015162374704e-07, "loss": 0.3172, "step": 17636 }, { "epoch": 2.56, "grad_norm": 8.673027038574219, "learning_rate": 1.1347690564685696e-07, "loss": 0.3117, "step": 17637 }, { "epoch": 2.56, "grad_norm": 8.986570358276367, "learning_rate": 1.1340368188036209e-07, "loss": 0.336, "step": 17638 }, { "epoch": 2.56, "grad_norm": 7.159663200378418, "learning_rate": 1.1333048032609782e-07, "loss": 0.2965, "step": 17639 }, { "epoch": 2.56, "grad_norm": 8.163818359375, "learning_rate": 1.1325730098589747e-07, "loss": 0.3626, "step": 17640 }, { "epoch": 2.56, "grad_norm": 9.011411666870117, "learning_rate": 1.1318414386159459e-07, "loss": 0.3487, "step": 17641 }, { "epoch": 2.56, "grad_norm": 9.725617408752441, "learning_rate": 1.1311100895502212e-07, "loss": 0.3624, "step": 17642 }, { "epoch": 2.56, "grad_norm": 9.127577781677246, "learning_rate": 1.1303789626801219e-07, "loss": 0.4528, "step": 17643 }, { "epoch": 2.56, "grad_norm": 8.286279678344727, "learning_rate": 1.1296480580239676e-07, "loss": 0.3491, "step": 17644 }, { "epoch": 2.56, "grad_norm": 7.2219390869140625, "learning_rate": 1.1289173756000669e-07, "loss": 0.3013, "step": 17645 }, { "epoch": 2.56, "grad_norm": 10.660215377807617, "learning_rate": 1.1281869154267288e-07, "loss": 0.3999, "step": 17646 }, { "epoch": 2.56, "grad_norm": 9.568899154663086, "learning_rate": 1.1274566775222516e-07, "loss": 0.3805, "step": 17647 }, { "epoch": 2.56, "grad_norm": 8.293617248535156, "learning_rate": 1.1267266619049309e-07, "loss": 0.2953, "step": 17648 }, { "epoch": 2.56, "grad_norm": 7.613317966461182, "learning_rate": 1.1259968685930554e-07, "loss": 0.2939, "step": 17649 }, { "epoch": 2.56, "grad_norm": 8.243143081665039, "learning_rate": 1.1252672976049104e-07, "loss": 0.3426, "step": 17650 }, { "epoch": 2.56, "grad_norm": 8.394537925720215, "learning_rate": 1.1245379489587736e-07, "loss": 0.3642, "step": 17651 }, { "epoch": 2.56, "grad_norm": 9.317805290222168, "learning_rate": 1.123808822672917e-07, "loss": 0.3495, "step": 17652 }, { "epoch": 2.56, "grad_norm": 9.19863510131836, "learning_rate": 1.1230799187656082e-07, "loss": 0.4742, "step": 17653 }, { "epoch": 2.56, "grad_norm": 8.223978042602539, "learning_rate": 1.1223512372551103e-07, "loss": 0.3278, "step": 17654 }, { "epoch": 2.56, "grad_norm": 8.200065612792969, "learning_rate": 1.1216227781596743e-07, "loss": 0.3404, "step": 17655 }, { "epoch": 2.56, "grad_norm": 8.971847534179688, "learning_rate": 1.1208945414975567e-07, "loss": 0.3513, "step": 17656 }, { "epoch": 2.56, "grad_norm": 9.613984107971191, "learning_rate": 1.1201665272869965e-07, "loss": 0.3764, "step": 17657 }, { "epoch": 2.56, "grad_norm": 8.399333000183105, "learning_rate": 1.1194387355462388e-07, "loss": 0.3751, "step": 17658 }, { "epoch": 2.56, "grad_norm": 7.938575267791748, "learning_rate": 1.11871116629351e-07, "loss": 0.3212, "step": 17659 }, { "epoch": 2.56, "grad_norm": 8.634852409362793, "learning_rate": 1.117983819547047e-07, "loss": 0.4036, "step": 17660 }, { "epoch": 2.56, "grad_norm": 9.37128734588623, "learning_rate": 1.117256695325065e-07, "loss": 0.3231, "step": 17661 }, { "epoch": 2.56, "grad_norm": 8.860896110534668, "learning_rate": 1.116529793645784e-07, "loss": 0.3404, "step": 17662 }, { "epoch": 2.56, "grad_norm": 8.423288345336914, "learning_rate": 1.1158031145274149e-07, "loss": 0.3675, "step": 17663 }, { "epoch": 2.56, "grad_norm": 9.977849006652832, "learning_rate": 1.1150766579881643e-07, "loss": 0.4087, "step": 17664 }, { "epoch": 2.56, "grad_norm": 8.933090209960938, "learning_rate": 1.114350424046231e-07, "loss": 0.4275, "step": 17665 }, { "epoch": 2.56, "grad_norm": 8.33267593383789, "learning_rate": 1.1136244127198092e-07, "loss": 0.33, "step": 17666 }, { "epoch": 2.56, "grad_norm": 8.571205139160156, "learning_rate": 1.1128986240270899e-07, "loss": 0.3758, "step": 17667 }, { "epoch": 2.56, "grad_norm": 8.335051536560059, "learning_rate": 1.1121730579862553e-07, "loss": 0.3322, "step": 17668 }, { "epoch": 2.56, "grad_norm": 9.389998435974121, "learning_rate": 1.1114477146154833e-07, "loss": 0.3647, "step": 17669 }, { "epoch": 2.56, "grad_norm": 10.548348426818848, "learning_rate": 1.1107225939329479e-07, "loss": 0.4478, "step": 17670 }, { "epoch": 2.56, "grad_norm": 9.204488754272461, "learning_rate": 1.1099976959568113e-07, "loss": 0.3852, "step": 17671 }, { "epoch": 2.56, "grad_norm": 9.127914428710938, "learning_rate": 1.1092730207052414e-07, "loss": 0.3789, "step": 17672 }, { "epoch": 2.56, "grad_norm": 9.288895606994629, "learning_rate": 1.1085485681963858e-07, "loss": 0.3906, "step": 17673 }, { "epoch": 2.56, "grad_norm": 9.094752311706543, "learning_rate": 1.107824338448402e-07, "loss": 0.3873, "step": 17674 }, { "epoch": 2.56, "grad_norm": 8.738266944885254, "learning_rate": 1.1071003314794314e-07, "loss": 0.3489, "step": 17675 }, { "epoch": 2.56, "grad_norm": 8.978069305419922, "learning_rate": 1.1063765473076092e-07, "loss": 0.333, "step": 17676 }, { "epoch": 2.56, "grad_norm": 8.766332626342773, "learning_rate": 1.1056529859510766e-07, "loss": 0.3152, "step": 17677 }, { "epoch": 2.57, "grad_norm": 9.880898475646973, "learning_rate": 1.1049296474279523e-07, "loss": 0.3207, "step": 17678 }, { "epoch": 2.57, "grad_norm": 9.191927909851074, "learning_rate": 1.1042065317563664e-07, "loss": 0.3627, "step": 17679 }, { "epoch": 2.57, "grad_norm": 7.127645015716553, "learning_rate": 1.10348363895443e-07, "loss": 0.278, "step": 17680 }, { "epoch": 2.57, "grad_norm": 7.979477405548096, "learning_rate": 1.1027609690402551e-07, "loss": 0.3134, "step": 17681 }, { "epoch": 2.57, "grad_norm": 8.428478240966797, "learning_rate": 1.1020385220319483e-07, "loss": 0.3395, "step": 17682 }, { "epoch": 2.57, "grad_norm": 8.608853340148926, "learning_rate": 1.1013162979476088e-07, "loss": 0.3668, "step": 17683 }, { "epoch": 2.57, "grad_norm": 8.978005409240723, "learning_rate": 1.1005942968053294e-07, "loss": 0.3382, "step": 17684 }, { "epoch": 2.57, "grad_norm": 7.716447830200195, "learning_rate": 1.0998725186232016e-07, "loss": 0.2878, "step": 17685 }, { "epoch": 2.57, "grad_norm": 8.199993133544922, "learning_rate": 1.0991509634193053e-07, "loss": 0.2962, "step": 17686 }, { "epoch": 2.57, "grad_norm": 9.145771980285645, "learning_rate": 1.0984296312117203e-07, "loss": 0.344, "step": 17687 }, { "epoch": 2.57, "grad_norm": 9.282866477966309, "learning_rate": 1.0977085220185167e-07, "loss": 0.3785, "step": 17688 }, { "epoch": 2.57, "grad_norm": 7.395017623901367, "learning_rate": 1.0969876358577634e-07, "loss": 0.3112, "step": 17689 }, { "epoch": 2.57, "grad_norm": 9.195984840393066, "learning_rate": 1.0962669727475149e-07, "loss": 0.3629, "step": 17690 }, { "epoch": 2.57, "grad_norm": 10.794397354125977, "learning_rate": 1.0955465327058345e-07, "loss": 0.4572, "step": 17691 }, { "epoch": 2.57, "grad_norm": 9.744227409362793, "learning_rate": 1.0948263157507632e-07, "loss": 0.3466, "step": 17692 }, { "epoch": 2.57, "grad_norm": 8.932178497314453, "learning_rate": 1.0941063219003543e-07, "loss": 0.3923, "step": 17693 }, { "epoch": 2.57, "grad_norm": 9.525259017944336, "learning_rate": 1.0933865511726359e-07, "loss": 0.2998, "step": 17694 }, { "epoch": 2.57, "grad_norm": 8.125469207763672, "learning_rate": 1.09266700358565e-07, "loss": 0.3436, "step": 17695 }, { "epoch": 2.57, "grad_norm": 8.525450706481934, "learning_rate": 1.0919476791574178e-07, "loss": 0.3674, "step": 17696 }, { "epoch": 2.57, "grad_norm": 9.391229629516602, "learning_rate": 1.0912285779059616e-07, "loss": 0.3926, "step": 17697 }, { "epoch": 2.57, "grad_norm": 8.36520767211914, "learning_rate": 1.090509699849299e-07, "loss": 0.3852, "step": 17698 }, { "epoch": 2.57, "grad_norm": 8.047929763793945, "learning_rate": 1.0897910450054371e-07, "loss": 0.3032, "step": 17699 }, { "epoch": 2.57, "grad_norm": 7.261064529418945, "learning_rate": 1.0890726133923867e-07, "loss": 0.3149, "step": 17700 }, { "epoch": 2.57, "grad_norm": 7.392640113830566, "learning_rate": 1.0883544050281413e-07, "loss": 0.2971, "step": 17701 }, { "epoch": 2.57, "grad_norm": 8.227510452270508, "learning_rate": 1.0876364199306965e-07, "loss": 0.3456, "step": 17702 }, { "epoch": 2.57, "grad_norm": 9.362996101379395, "learning_rate": 1.08691865811804e-07, "loss": 0.3698, "step": 17703 }, { "epoch": 2.57, "grad_norm": 8.84476375579834, "learning_rate": 1.0862011196081555e-07, "loss": 0.337, "step": 17704 }, { "epoch": 2.57, "grad_norm": 10.235804557800293, "learning_rate": 1.0854838044190174e-07, "loss": 0.3766, "step": 17705 }, { "epoch": 2.57, "grad_norm": 8.142683982849121, "learning_rate": 1.0847667125685977e-07, "loss": 0.3432, "step": 17706 }, { "epoch": 2.57, "grad_norm": 9.085365295410156, "learning_rate": 1.0840498440748635e-07, "loss": 0.3665, "step": 17707 }, { "epoch": 2.57, "grad_norm": 7.767769813537598, "learning_rate": 1.0833331989557725e-07, "loss": 0.3343, "step": 17708 }, { "epoch": 2.57, "grad_norm": 9.645241737365723, "learning_rate": 1.0826167772292805e-07, "loss": 0.3675, "step": 17709 }, { "epoch": 2.57, "grad_norm": 8.158414840698242, "learning_rate": 1.0819005789133384e-07, "loss": 0.3256, "step": 17710 }, { "epoch": 2.57, "grad_norm": 9.120011329650879, "learning_rate": 1.0811846040258821e-07, "loss": 0.3764, "step": 17711 }, { "epoch": 2.57, "grad_norm": 8.768682479858398, "learning_rate": 1.0804688525848582e-07, "loss": 0.3396, "step": 17712 }, { "epoch": 2.57, "grad_norm": 8.717267990112305, "learning_rate": 1.0797533246081902e-07, "loss": 0.3285, "step": 17713 }, { "epoch": 2.57, "grad_norm": 7.75367546081543, "learning_rate": 1.0790380201138138e-07, "loss": 0.3447, "step": 17714 }, { "epoch": 2.57, "grad_norm": 9.516741752624512, "learning_rate": 1.0783229391196414e-07, "loss": 0.4117, "step": 17715 }, { "epoch": 2.57, "grad_norm": 9.633177757263184, "learning_rate": 1.0776080816435928e-07, "loss": 0.3749, "step": 17716 }, { "epoch": 2.57, "grad_norm": 7.997807502746582, "learning_rate": 1.0768934477035763e-07, "loss": 0.3309, "step": 17717 }, { "epoch": 2.57, "grad_norm": 8.203964233398438, "learning_rate": 1.0761790373174951e-07, "loss": 0.3398, "step": 17718 }, { "epoch": 2.57, "grad_norm": 8.568429946899414, "learning_rate": 1.0754648505032482e-07, "loss": 0.3472, "step": 17719 }, { "epoch": 2.57, "grad_norm": 9.621604919433594, "learning_rate": 1.0747508872787292e-07, "loss": 0.3471, "step": 17720 }, { "epoch": 2.57, "grad_norm": 8.215215682983398, "learning_rate": 1.0740371476618248e-07, "loss": 0.3323, "step": 17721 }, { "epoch": 2.57, "grad_norm": 8.996272087097168, "learning_rate": 1.0733236316704174e-07, "loss": 0.3561, "step": 17722 }, { "epoch": 2.57, "grad_norm": 8.83784008026123, "learning_rate": 1.0726103393223807e-07, "loss": 0.3326, "step": 17723 }, { "epoch": 2.57, "grad_norm": 9.1011381149292, "learning_rate": 1.0718972706355889e-07, "loss": 0.3901, "step": 17724 }, { "epoch": 2.57, "grad_norm": 7.939597129821777, "learning_rate": 1.0711844256279001e-07, "loss": 0.3472, "step": 17725 }, { "epoch": 2.57, "grad_norm": 8.243573188781738, "learning_rate": 1.0704718043171801e-07, "loss": 0.3638, "step": 17726 }, { "epoch": 2.57, "grad_norm": 8.828036308288574, "learning_rate": 1.06975940672128e-07, "loss": 0.342, "step": 17727 }, { "epoch": 2.57, "grad_norm": 8.227916717529297, "learning_rate": 1.069047232858048e-07, "loss": 0.3336, "step": 17728 }, { "epoch": 2.57, "grad_norm": 8.084320068359375, "learning_rate": 1.0683352827453263e-07, "loss": 0.288, "step": 17729 }, { "epoch": 2.57, "grad_norm": 8.21574592590332, "learning_rate": 1.0676235564009517e-07, "loss": 0.3384, "step": 17730 }, { "epoch": 2.57, "grad_norm": 8.45140266418457, "learning_rate": 1.0669120538427568e-07, "loss": 0.3516, "step": 17731 }, { "epoch": 2.57, "grad_norm": 8.359325408935547, "learning_rate": 1.0662007750885615e-07, "loss": 0.3349, "step": 17732 }, { "epoch": 2.57, "grad_norm": 8.697510719299316, "learning_rate": 1.0654897201561953e-07, "loss": 0.3504, "step": 17733 }, { "epoch": 2.57, "grad_norm": 7.06807804107666, "learning_rate": 1.0647788890634612e-07, "loss": 0.2974, "step": 17734 }, { "epoch": 2.57, "grad_norm": 8.218518257141113, "learning_rate": 1.0640682818281788e-07, "loss": 0.3553, "step": 17735 }, { "epoch": 2.57, "grad_norm": 9.079551696777344, "learning_rate": 1.0633578984681446e-07, "loss": 0.3506, "step": 17736 }, { "epoch": 2.57, "grad_norm": 7.692206382751465, "learning_rate": 1.0626477390011568e-07, "loss": 0.3559, "step": 17737 }, { "epoch": 2.57, "grad_norm": 9.903674125671387, "learning_rate": 1.0619378034450089e-07, "loss": 0.3708, "step": 17738 }, { "epoch": 2.57, "grad_norm": 8.00908088684082, "learning_rate": 1.0612280918174865e-07, "loss": 0.3688, "step": 17739 }, { "epoch": 2.57, "grad_norm": 7.991623401641846, "learning_rate": 1.0605186041363701e-07, "loss": 0.3831, "step": 17740 }, { "epoch": 2.57, "grad_norm": 8.700238227844238, "learning_rate": 1.0598093404194352e-07, "loss": 0.4098, "step": 17741 }, { "epoch": 2.57, "grad_norm": 8.588964462280273, "learning_rate": 1.0591003006844512e-07, "loss": 0.3836, "step": 17742 }, { "epoch": 2.57, "grad_norm": 7.441648483276367, "learning_rate": 1.0583914849491804e-07, "loss": 0.3489, "step": 17743 }, { "epoch": 2.57, "grad_norm": 8.034340858459473, "learning_rate": 1.0576828932313841e-07, "loss": 0.3403, "step": 17744 }, { "epoch": 2.57, "grad_norm": 9.470819473266602, "learning_rate": 1.0569745255488138e-07, "loss": 0.3888, "step": 17745 }, { "epoch": 2.57, "grad_norm": 8.968663215637207, "learning_rate": 1.056266381919213e-07, "loss": 0.3248, "step": 17746 }, { "epoch": 2.58, "grad_norm": 7.892370223999023, "learning_rate": 1.0555584623603286e-07, "loss": 0.3409, "step": 17747 }, { "epoch": 2.58, "grad_norm": 9.78102970123291, "learning_rate": 1.054850766889891e-07, "loss": 0.315, "step": 17748 }, { "epoch": 2.58, "grad_norm": 8.997795104980469, "learning_rate": 1.0541432955256369e-07, "loss": 0.3817, "step": 17749 }, { "epoch": 2.58, "grad_norm": 8.11807632446289, "learning_rate": 1.0534360482852822e-07, "loss": 0.3561, "step": 17750 }, { "epoch": 2.58, "grad_norm": 8.40567684173584, "learning_rate": 1.052729025186555e-07, "loss": 0.324, "step": 17751 }, { "epoch": 2.58, "grad_norm": 8.775101661682129, "learning_rate": 1.0520222262471634e-07, "loss": 0.3448, "step": 17752 }, { "epoch": 2.58, "grad_norm": 8.842855453491211, "learning_rate": 1.0513156514848132e-07, "loss": 0.3523, "step": 17753 }, { "epoch": 2.58, "grad_norm": 9.467940330505371, "learning_rate": 1.0506093009172123e-07, "loss": 0.3907, "step": 17754 }, { "epoch": 2.58, "grad_norm": 9.027252197265625, "learning_rate": 1.0499031745620513e-07, "loss": 0.3759, "step": 17755 }, { "epoch": 2.58, "grad_norm": 8.433304786682129, "learning_rate": 1.0491972724370279e-07, "loss": 0.3886, "step": 17756 }, { "epoch": 2.58, "grad_norm": 8.494118690490723, "learning_rate": 1.0484915945598205e-07, "loss": 0.3383, "step": 17757 }, { "epoch": 2.58, "grad_norm": 8.707456588745117, "learning_rate": 1.0477861409481126e-07, "loss": 0.3772, "step": 17758 }, { "epoch": 2.58, "grad_norm": 7.733091831207275, "learning_rate": 1.0470809116195766e-07, "loss": 0.3551, "step": 17759 }, { "epoch": 2.58, "grad_norm": 7.719959259033203, "learning_rate": 1.0463759065918809e-07, "loss": 0.3412, "step": 17760 }, { "epoch": 2.58, "grad_norm": 8.69337272644043, "learning_rate": 1.04567112588269e-07, "loss": 0.3383, "step": 17761 }, { "epoch": 2.58, "grad_norm": 10.576536178588867, "learning_rate": 1.0449665695096588e-07, "loss": 0.3495, "step": 17762 }, { "epoch": 2.58, "grad_norm": 8.479898452758789, "learning_rate": 1.0442622374904397e-07, "loss": 0.2963, "step": 17763 }, { "epoch": 2.58, "grad_norm": 8.142387390136719, "learning_rate": 1.0435581298426788e-07, "loss": 0.3423, "step": 17764 }, { "epoch": 2.58, "grad_norm": 10.16531753540039, "learning_rate": 1.0428542465840162e-07, "loss": 0.3463, "step": 17765 }, { "epoch": 2.58, "grad_norm": 9.076942443847656, "learning_rate": 1.0421505877320891e-07, "loss": 0.3437, "step": 17766 }, { "epoch": 2.58, "grad_norm": 8.42995548248291, "learning_rate": 1.0414471533045199e-07, "loss": 0.3297, "step": 17767 }, { "epoch": 2.58, "grad_norm": 9.351052284240723, "learning_rate": 1.0407439433189403e-07, "loss": 0.3624, "step": 17768 }, { "epoch": 2.58, "grad_norm": 8.583978652954102, "learning_rate": 1.0400409577929592e-07, "loss": 0.3367, "step": 17769 }, { "epoch": 2.58, "grad_norm": 8.314520835876465, "learning_rate": 1.0393381967441984e-07, "loss": 0.3508, "step": 17770 }, { "epoch": 2.58, "grad_norm": 9.375153541564941, "learning_rate": 1.0386356601902568e-07, "loss": 0.3769, "step": 17771 }, { "epoch": 2.58, "grad_norm": 9.737565040588379, "learning_rate": 1.0379333481487385e-07, "loss": 0.3879, "step": 17772 }, { "epoch": 2.58, "grad_norm": 10.174700736999512, "learning_rate": 1.037231260637239e-07, "loss": 0.3365, "step": 17773 }, { "epoch": 2.58, "grad_norm": 7.847480297088623, "learning_rate": 1.0365293976733458e-07, "loss": 0.3472, "step": 17774 }, { "epoch": 2.58, "grad_norm": 8.308415412902832, "learning_rate": 1.0358277592746456e-07, "loss": 0.3729, "step": 17775 }, { "epoch": 2.58, "grad_norm": 7.860849857330322, "learning_rate": 1.0351263454587155e-07, "loss": 0.3558, "step": 17776 }, { "epoch": 2.58, "grad_norm": 9.92128849029541, "learning_rate": 1.0344251562431283e-07, "loss": 0.4316, "step": 17777 }, { "epoch": 2.58, "grad_norm": 10.073431015014648, "learning_rate": 1.0337241916454509e-07, "loss": 0.3969, "step": 17778 }, { "epoch": 2.58, "grad_norm": 9.447789192199707, "learning_rate": 1.0330234516832448e-07, "loss": 0.3933, "step": 17779 }, { "epoch": 2.58, "grad_norm": 8.524462699890137, "learning_rate": 1.032322936374067e-07, "loss": 0.3642, "step": 17780 }, { "epoch": 2.58, "grad_norm": 8.927704811096191, "learning_rate": 1.0316226457354671e-07, "loss": 0.373, "step": 17781 }, { "epoch": 2.58, "grad_norm": 8.599501609802246, "learning_rate": 1.0309225797849896e-07, "loss": 0.323, "step": 17782 }, { "epoch": 2.58, "grad_norm": 8.519513130187988, "learning_rate": 1.030222738540174e-07, "loss": 0.3467, "step": 17783 }, { "epoch": 2.58, "grad_norm": 8.06091594696045, "learning_rate": 1.029523122018553e-07, "loss": 0.3196, "step": 17784 }, { "epoch": 2.58, "grad_norm": 9.914785385131836, "learning_rate": 1.0288237302376545e-07, "loss": 0.3945, "step": 17785 }, { "epoch": 2.58, "grad_norm": 8.378557205200195, "learning_rate": 1.0281245632150015e-07, "loss": 0.352, "step": 17786 }, { "epoch": 2.58, "grad_norm": 8.576512336730957, "learning_rate": 1.027425620968111e-07, "loss": 0.356, "step": 17787 }, { "epoch": 2.58, "grad_norm": 6.975689888000488, "learning_rate": 1.0267269035144888e-07, "loss": 0.3174, "step": 17788 }, { "epoch": 2.58, "grad_norm": 7.561525821685791, "learning_rate": 1.0260284108716488e-07, "loss": 0.3092, "step": 17789 }, { "epoch": 2.58, "grad_norm": 8.579115867614746, "learning_rate": 1.0253301430570816e-07, "loss": 0.3345, "step": 17790 }, { "epoch": 2.58, "grad_norm": 8.375221252441406, "learning_rate": 1.0246321000882885e-07, "loss": 0.3444, "step": 17791 }, { "epoch": 2.58, "grad_norm": 9.292620658874512, "learning_rate": 1.0239342819827534e-07, "loss": 0.3627, "step": 17792 }, { "epoch": 2.58, "grad_norm": 7.881742000579834, "learning_rate": 1.0232366887579603e-07, "loss": 0.3433, "step": 17793 }, { "epoch": 2.58, "grad_norm": 10.178893089294434, "learning_rate": 1.022539320431386e-07, "loss": 0.4401, "step": 17794 }, { "epoch": 2.58, "grad_norm": 8.196283340454102, "learning_rate": 1.0218421770205021e-07, "loss": 0.2948, "step": 17795 }, { "epoch": 2.58, "grad_norm": 7.882575035095215, "learning_rate": 1.0211452585427738e-07, "loss": 0.3019, "step": 17796 }, { "epoch": 2.58, "grad_norm": 8.026294708251953, "learning_rate": 1.0204485650156636e-07, "loss": 0.2832, "step": 17797 }, { "epoch": 2.58, "grad_norm": 9.80575180053711, "learning_rate": 1.0197520964566231e-07, "loss": 0.394, "step": 17798 }, { "epoch": 2.58, "grad_norm": 8.602265357971191, "learning_rate": 1.0190558528831028e-07, "loss": 0.3521, "step": 17799 }, { "epoch": 2.58, "grad_norm": 7.76254415512085, "learning_rate": 1.0183598343125455e-07, "loss": 0.377, "step": 17800 }, { "epoch": 2.58, "grad_norm": 9.837343215942383, "learning_rate": 1.0176640407623916e-07, "loss": 0.432, "step": 17801 }, { "epoch": 2.58, "grad_norm": 9.936574935913086, "learning_rate": 1.0169684722500648e-07, "loss": 0.4137, "step": 17802 }, { "epoch": 2.58, "grad_norm": 9.762712478637695, "learning_rate": 1.0162731287930026e-07, "loss": 0.3779, "step": 17803 }, { "epoch": 2.58, "grad_norm": 8.82655143737793, "learning_rate": 1.0155780104086154e-07, "loss": 0.3614, "step": 17804 }, { "epoch": 2.58, "grad_norm": 16.09163475036621, "learning_rate": 1.0148831171143257e-07, "loss": 0.4733, "step": 17805 }, { "epoch": 2.58, "grad_norm": 9.197599411010742, "learning_rate": 1.014188448927542e-07, "loss": 0.3787, "step": 17806 }, { "epoch": 2.58, "grad_norm": 8.688812255859375, "learning_rate": 1.0134940058656616e-07, "loss": 0.3653, "step": 17807 }, { "epoch": 2.58, "grad_norm": 9.256101608276367, "learning_rate": 1.0127997879460914e-07, "loss": 0.351, "step": 17808 }, { "epoch": 2.58, "grad_norm": 8.8729248046875, "learning_rate": 1.0121057951862166e-07, "loss": 0.3509, "step": 17809 }, { "epoch": 2.58, "grad_norm": 10.04000186920166, "learning_rate": 1.011412027603431e-07, "loss": 0.3972, "step": 17810 }, { "epoch": 2.58, "grad_norm": 10.3688383102417, "learning_rate": 1.0107184852151085e-07, "loss": 0.4086, "step": 17811 }, { "epoch": 2.58, "grad_norm": 9.01170825958252, "learning_rate": 1.0100251680386318e-07, "loss": 0.2624, "step": 17812 }, { "epoch": 2.58, "grad_norm": 8.115520477294922, "learning_rate": 1.0093320760913648e-07, "loss": 0.3646, "step": 17813 }, { "epoch": 2.58, "grad_norm": 9.355072975158691, "learning_rate": 1.0086392093906759e-07, "loss": 0.4399, "step": 17814 }, { "epoch": 2.58, "grad_norm": 8.217541694641113, "learning_rate": 1.0079465679539212e-07, "loss": 0.3118, "step": 17815 }, { "epoch": 2.59, "grad_norm": 7.494823932647705, "learning_rate": 1.0072541517984546e-07, "loss": 0.2933, "step": 17816 }, { "epoch": 2.59, "grad_norm": 7.816226005554199, "learning_rate": 1.0065619609416232e-07, "loss": 0.2963, "step": 17817 }, { "epoch": 2.59, "grad_norm": 9.023796081542969, "learning_rate": 1.0058699954007699e-07, "loss": 0.3361, "step": 17818 }, { "epoch": 2.59, "grad_norm": 7.9423017501831055, "learning_rate": 1.0051782551932297e-07, "loss": 0.3368, "step": 17819 }, { "epoch": 2.59, "grad_norm": 8.489139556884766, "learning_rate": 1.0044867403363333e-07, "loss": 0.3705, "step": 17820 }, { "epoch": 2.59, "grad_norm": 8.77590274810791, "learning_rate": 1.0037954508474045e-07, "loss": 0.3337, "step": 17821 }, { "epoch": 2.59, "grad_norm": 8.596574783325195, "learning_rate": 1.0031043867437661e-07, "loss": 0.3164, "step": 17822 }, { "epoch": 2.59, "grad_norm": 8.022562980651855, "learning_rate": 1.0024135480427253e-07, "loss": 0.3409, "step": 17823 }, { "epoch": 2.59, "grad_norm": 8.655746459960938, "learning_rate": 1.0017229347615974e-07, "loss": 0.3388, "step": 17824 }, { "epoch": 2.59, "grad_norm": 8.210434913635254, "learning_rate": 1.0010325469176761e-07, "loss": 0.3306, "step": 17825 }, { "epoch": 2.59, "grad_norm": 8.85877513885498, "learning_rate": 1.0003423845282678e-07, "loss": 0.3542, "step": 17826 }, { "epoch": 2.59, "grad_norm": 9.579176902770996, "learning_rate": 9.996524476106549e-08, "loss": 0.3916, "step": 17827 }, { "epoch": 2.59, "grad_norm": 9.387337684631348, "learning_rate": 9.989627361821263e-08, "loss": 0.3948, "step": 17828 }, { "epoch": 2.59, "grad_norm": 8.024436950683594, "learning_rate": 9.982732502599611e-08, "loss": 0.3393, "step": 17829 }, { "epoch": 2.59, "grad_norm": 8.503774642944336, "learning_rate": 9.975839898614313e-08, "loss": 0.3576, "step": 17830 }, { "epoch": 2.59, "grad_norm": 7.752019882202148, "learning_rate": 9.968949550038119e-08, "loss": 0.3241, "step": 17831 }, { "epoch": 2.59, "grad_norm": 9.6466064453125, "learning_rate": 9.962061457043557e-08, "loss": 0.3853, "step": 17832 }, { "epoch": 2.59, "grad_norm": 8.167845726013184, "learning_rate": 9.955175619803303e-08, "loss": 0.344, "step": 17833 }, { "epoch": 2.59, "grad_norm": 8.377924919128418, "learning_rate": 9.948292038489781e-08, "loss": 0.3431, "step": 17834 }, { "epoch": 2.59, "grad_norm": 9.271124839782715, "learning_rate": 9.9414107132755e-08, "loss": 0.3885, "step": 17835 }, { "epoch": 2.59, "grad_norm": 11.98839282989502, "learning_rate": 9.934531644332832e-08, "loss": 0.4265, "step": 17836 }, { "epoch": 2.59, "grad_norm": 9.626975059509277, "learning_rate": 9.927654831834132e-08, "loss": 0.3766, "step": 17837 }, { "epoch": 2.59, "grad_norm": 9.185108184814453, "learning_rate": 9.920780275951701e-08, "loss": 0.4002, "step": 17838 }, { "epoch": 2.59, "grad_norm": 8.60136604309082, "learning_rate": 9.913907976857739e-08, "loss": 0.3095, "step": 17839 }, { "epoch": 2.59, "grad_norm": 7.736815452575684, "learning_rate": 9.907037934724449e-08, "loss": 0.3323, "step": 17840 }, { "epoch": 2.59, "grad_norm": 8.423054695129395, "learning_rate": 9.90017014972394e-08, "loss": 0.3582, "step": 17841 }, { "epoch": 2.59, "grad_norm": 9.163891792297363, "learning_rate": 9.893304622028264e-08, "loss": 0.357, "step": 17842 }, { "epoch": 2.59, "grad_norm": 8.364550590515137, "learning_rate": 9.88644135180946e-08, "loss": 0.3836, "step": 17843 }, { "epoch": 2.59, "grad_norm": 8.026780128479004, "learning_rate": 9.879580339239401e-08, "loss": 0.3388, "step": 17844 }, { "epoch": 2.59, "grad_norm": 8.501372337341309, "learning_rate": 9.872721584490073e-08, "loss": 0.3075, "step": 17845 }, { "epoch": 2.59, "grad_norm": 8.494614601135254, "learning_rate": 9.865865087733216e-08, "loss": 0.3406, "step": 17846 }, { "epoch": 2.59, "grad_norm": 9.481648445129395, "learning_rate": 9.859010849140703e-08, "loss": 0.4023, "step": 17847 }, { "epoch": 2.59, "grad_norm": 8.7747163772583, "learning_rate": 9.852158868884186e-08, "loss": 0.3767, "step": 17848 }, { "epoch": 2.59, "grad_norm": 10.859578132629395, "learning_rate": 9.845309147135361e-08, "loss": 0.4796, "step": 17849 }, { "epoch": 2.59, "grad_norm": 7.9730658531188965, "learning_rate": 9.838461684065824e-08, "loss": 0.3647, "step": 17850 }, { "epoch": 2.59, "grad_norm": 8.897038459777832, "learning_rate": 9.831616479847127e-08, "loss": 0.348, "step": 17851 }, { "epoch": 2.59, "grad_norm": 8.840119361877441, "learning_rate": 9.824773534650777e-08, "loss": 0.305, "step": 17852 }, { "epoch": 2.59, "grad_norm": 8.940709114074707, "learning_rate": 9.817932848648202e-08, "loss": 0.3833, "step": 17853 }, { "epoch": 2.59, "grad_norm": 7.629087448120117, "learning_rate": 9.811094422010802e-08, "loss": 0.3465, "step": 17854 }, { "epoch": 2.59, "grad_norm": 8.039570808410645, "learning_rate": 9.80425825490988e-08, "loss": 0.3178, "step": 17855 }, { "epoch": 2.59, "grad_norm": 7.8595781326293945, "learning_rate": 9.797424347516714e-08, "loss": 0.3013, "step": 17856 }, { "epoch": 2.59, "grad_norm": 7.674307346343994, "learning_rate": 9.79059270000252e-08, "loss": 0.3345, "step": 17857 }, { "epoch": 2.59, "grad_norm": 8.261363983154297, "learning_rate": 9.783763312538451e-08, "loss": 0.3204, "step": 17858 }, { "epoch": 2.59, "grad_norm": 7.941915512084961, "learning_rate": 9.776936185295603e-08, "loss": 0.3507, "step": 17859 }, { "epoch": 2.59, "grad_norm": 8.296972274780273, "learning_rate": 9.770111318445018e-08, "loss": 0.3112, "step": 17860 }, { "epoch": 2.59, "grad_norm": 9.107335090637207, "learning_rate": 9.763288712157702e-08, "loss": 0.3879, "step": 17861 }, { "epoch": 2.59, "grad_norm": 8.425495147705078, "learning_rate": 9.756468366604575e-08, "loss": 0.3977, "step": 17862 }, { "epoch": 2.59, "grad_norm": 8.196249008178711, "learning_rate": 9.749650281956467e-08, "loss": 0.3174, "step": 17863 }, { "epoch": 2.59, "grad_norm": 8.52088451385498, "learning_rate": 9.742834458384264e-08, "loss": 0.2918, "step": 17864 }, { "epoch": 2.59, "grad_norm": 7.796031951904297, "learning_rate": 9.736020896058661e-08, "loss": 0.3324, "step": 17865 }, { "epoch": 2.59, "grad_norm": 9.169570922851562, "learning_rate": 9.729209595150423e-08, "loss": 0.3352, "step": 17866 }, { "epoch": 2.59, "grad_norm": 7.981697082519531, "learning_rate": 9.722400555830124e-08, "loss": 0.3482, "step": 17867 }, { "epoch": 2.59, "grad_norm": 9.995918273925781, "learning_rate": 9.715593778268438e-08, "loss": 0.3269, "step": 17868 }, { "epoch": 2.59, "grad_norm": 9.464726448059082, "learning_rate": 9.70878926263583e-08, "loss": 0.3602, "step": 17869 }, { "epoch": 2.59, "grad_norm": 8.253803253173828, "learning_rate": 9.701987009102797e-08, "loss": 0.3571, "step": 17870 }, { "epoch": 2.59, "grad_norm": 10.387018203735352, "learning_rate": 9.695187017839767e-08, "loss": 0.3915, "step": 17871 }, { "epoch": 2.59, "grad_norm": 10.278680801391602, "learning_rate": 9.688389289017096e-08, "loss": 0.3724, "step": 17872 }, { "epoch": 2.59, "grad_norm": 10.419835090637207, "learning_rate": 9.681593822805078e-08, "loss": 0.3768, "step": 17873 }, { "epoch": 2.59, "grad_norm": 9.534303665161133, "learning_rate": 9.674800619373991e-08, "loss": 0.3585, "step": 17874 }, { "epoch": 2.59, "grad_norm": 8.975058555603027, "learning_rate": 9.668009678894007e-08, "loss": 0.3485, "step": 17875 }, { "epoch": 2.59, "grad_norm": 9.20598030090332, "learning_rate": 9.661221001535258e-08, "loss": 0.3513, "step": 17876 }, { "epoch": 2.59, "grad_norm": 9.060134887695312, "learning_rate": 9.654434587467841e-08, "loss": 0.3325, "step": 17877 }, { "epoch": 2.59, "grad_norm": 8.675638198852539, "learning_rate": 9.647650436861787e-08, "loss": 0.3855, "step": 17878 }, { "epoch": 2.59, "grad_norm": 7.833183288574219, "learning_rate": 9.640868549887004e-08, "loss": 0.3633, "step": 17879 }, { "epoch": 2.59, "grad_norm": 8.88469123840332, "learning_rate": 9.63408892671348e-08, "loss": 0.3594, "step": 17880 }, { "epoch": 2.59, "grad_norm": 10.983251571655273, "learning_rate": 9.627311567510987e-08, "loss": 0.4506, "step": 17881 }, { "epoch": 2.59, "grad_norm": 8.62020206451416, "learning_rate": 9.620536472449393e-08, "loss": 0.3518, "step": 17882 }, { "epoch": 2.59, "grad_norm": 7.982848644256592, "learning_rate": 9.613763641698414e-08, "loss": 0.3226, "step": 17883 }, { "epoch": 2.59, "grad_norm": 7.929888725280762, "learning_rate": 9.606993075427683e-08, "loss": 0.3166, "step": 17884 }, { "epoch": 2.6, "grad_norm": 8.875078201293945, "learning_rate": 9.60022477380692e-08, "loss": 0.3668, "step": 17885 }, { "epoch": 2.6, "grad_norm": 7.068477153778076, "learning_rate": 9.593458737005589e-08, "loss": 0.2842, "step": 17886 }, { "epoch": 2.6, "grad_norm": 9.456075668334961, "learning_rate": 9.586694965193309e-08, "loss": 0.3666, "step": 17887 }, { "epoch": 2.6, "grad_norm": 9.587965965270996, "learning_rate": 9.579933458539424e-08, "loss": 0.3956, "step": 17888 }, { "epoch": 2.6, "grad_norm": 8.143336296081543, "learning_rate": 9.573174217213442e-08, "loss": 0.3076, "step": 17889 }, { "epoch": 2.6, "grad_norm": 10.284316062927246, "learning_rate": 9.566417241384627e-08, "loss": 0.3445, "step": 17890 }, { "epoch": 2.6, "grad_norm": 7.883125305175781, "learning_rate": 9.559662531222279e-08, "loss": 0.2929, "step": 17891 }, { "epoch": 2.6, "grad_norm": 8.050247192382812, "learning_rate": 9.552910086895649e-08, "loss": 0.3469, "step": 17892 }, { "epoch": 2.6, "grad_norm": 11.288122177124023, "learning_rate": 9.546159908573892e-08, "loss": 0.4062, "step": 17893 }, { "epoch": 2.6, "grad_norm": 9.056783676147461, "learning_rate": 9.539411996426128e-08, "loss": 0.3394, "step": 17894 }, { "epoch": 2.6, "grad_norm": 9.486794471740723, "learning_rate": 9.53266635062141e-08, "loss": 0.3599, "step": 17895 }, { "epoch": 2.6, "grad_norm": 8.363452911376953, "learning_rate": 9.525922971328748e-08, "loss": 0.3146, "step": 17896 }, { "epoch": 2.6, "grad_norm": 9.054849624633789, "learning_rate": 9.519181858717074e-08, "loss": 0.3669, "step": 17897 }, { "epoch": 2.6, "grad_norm": 8.407047271728516, "learning_rate": 9.512443012955285e-08, "loss": 0.3125, "step": 17898 }, { "epoch": 2.6, "grad_norm": 9.912233352661133, "learning_rate": 9.505706434212235e-08, "loss": 0.3845, "step": 17899 }, { "epoch": 2.6, "grad_norm": 8.654590606689453, "learning_rate": 9.498972122656624e-08, "loss": 0.2856, "step": 17900 }, { "epoch": 2.6, "grad_norm": 7.766305923461914, "learning_rate": 9.49224007845727e-08, "loss": 0.323, "step": 17901 }, { "epoch": 2.6, "grad_norm": 9.697940826416016, "learning_rate": 9.485510301782729e-08, "loss": 0.4008, "step": 17902 }, { "epoch": 2.6, "grad_norm": 8.153613090515137, "learning_rate": 9.478782792801698e-08, "loss": 0.3657, "step": 17903 }, { "epoch": 2.6, "grad_norm": 9.152310371398926, "learning_rate": 9.472057551682667e-08, "loss": 0.3495, "step": 17904 }, { "epoch": 2.6, "grad_norm": 11.266613006591797, "learning_rate": 9.465334578594141e-08, "loss": 0.4047, "step": 17905 }, { "epoch": 2.6, "grad_norm": 9.24378490447998, "learning_rate": 9.458613873704557e-08, "loss": 0.3704, "step": 17906 }, { "epoch": 2.6, "grad_norm": 8.867186546325684, "learning_rate": 9.451895437182277e-08, "loss": 0.3787, "step": 17907 }, { "epoch": 2.6, "grad_norm": 9.267152786254883, "learning_rate": 9.445179269195647e-08, "loss": 0.3538, "step": 17908 }, { "epoch": 2.6, "grad_norm": 8.845565795898438, "learning_rate": 9.438465369912906e-08, "loss": 0.3086, "step": 17909 }, { "epoch": 2.6, "grad_norm": 10.257640838623047, "learning_rate": 9.431753739502268e-08, "loss": 0.3575, "step": 17910 }, { "epoch": 2.6, "grad_norm": 9.154312133789062, "learning_rate": 9.425044378131875e-08, "loss": 0.4155, "step": 17911 }, { "epoch": 2.6, "grad_norm": 9.46157455444336, "learning_rate": 9.418337285969836e-08, "loss": 0.3214, "step": 17912 }, { "epoch": 2.6, "grad_norm": 8.937460899353027, "learning_rate": 9.411632463184172e-08, "loss": 0.4124, "step": 17913 }, { "epoch": 2.6, "grad_norm": 8.61563777923584, "learning_rate": 9.404929909942871e-08, "loss": 0.3751, "step": 17914 }, { "epoch": 2.6, "grad_norm": 8.698432922363281, "learning_rate": 9.398229626413845e-08, "loss": 0.3362, "step": 17915 }, { "epoch": 2.6, "grad_norm": 8.108368873596191, "learning_rate": 9.391531612764969e-08, "loss": 0.3606, "step": 17916 }, { "epoch": 2.6, "grad_norm": 9.293045043945312, "learning_rate": 9.384835869164043e-08, "loss": 0.3549, "step": 17917 }, { "epoch": 2.6, "grad_norm": 10.483453750610352, "learning_rate": 9.378142395778833e-08, "loss": 0.3924, "step": 17918 }, { "epoch": 2.6, "grad_norm": 9.336865425109863, "learning_rate": 9.37145119277699e-08, "loss": 0.3526, "step": 17919 }, { "epoch": 2.6, "grad_norm": 8.354684829711914, "learning_rate": 9.36476226032622e-08, "loss": 0.3659, "step": 17920 }, { "epoch": 2.6, "grad_norm": 9.519580841064453, "learning_rate": 9.358075598594018e-08, "loss": 0.3686, "step": 17921 }, { "epoch": 2.6, "grad_norm": 9.590657234191895, "learning_rate": 9.351391207747994e-08, "loss": 0.3557, "step": 17922 }, { "epoch": 2.6, "grad_norm": 9.044931411743164, "learning_rate": 9.344709087955538e-08, "loss": 0.3339, "step": 17923 }, { "epoch": 2.6, "grad_norm": 9.518628120422363, "learning_rate": 9.338029239384137e-08, "loss": 0.352, "step": 17924 }, { "epoch": 2.6, "grad_norm": 7.958426475524902, "learning_rate": 9.331351662201092e-08, "loss": 0.3421, "step": 17925 }, { "epoch": 2.6, "grad_norm": 9.43387222290039, "learning_rate": 9.324676356573701e-08, "loss": 0.3738, "step": 17926 }, { "epoch": 2.6, "grad_norm": 10.391264915466309, "learning_rate": 9.31800332266921e-08, "loss": 0.4431, "step": 17927 }, { "epoch": 2.6, "grad_norm": 8.499333381652832, "learning_rate": 9.311332560654806e-08, "loss": 0.3612, "step": 17928 }, { "epoch": 2.6, "grad_norm": 8.700089454650879, "learning_rate": 9.304664070697609e-08, "loss": 0.3416, "step": 17929 }, { "epoch": 2.6, "grad_norm": 7.885985374450684, "learning_rate": 9.29799785296469e-08, "loss": 0.293, "step": 17930 }, { "epoch": 2.6, "grad_norm": 8.694849014282227, "learning_rate": 9.291333907623067e-08, "loss": 0.3666, "step": 17931 }, { "epoch": 2.6, "grad_norm": 8.373940467834473, "learning_rate": 9.284672234839674e-08, "loss": 0.3384, "step": 17932 }, { "epoch": 2.6, "grad_norm": 9.440919876098633, "learning_rate": 9.278012834781435e-08, "loss": 0.3527, "step": 17933 }, { "epoch": 2.6, "grad_norm": 8.218305587768555, "learning_rate": 9.271355707615158e-08, "loss": 0.2951, "step": 17934 }, { "epoch": 2.6, "grad_norm": 8.52918815612793, "learning_rate": 9.264700853507668e-08, "loss": 0.3756, "step": 17935 }, { "epoch": 2.6, "grad_norm": 7.857720375061035, "learning_rate": 9.258048272625652e-08, "loss": 0.3213, "step": 17936 }, { "epoch": 2.6, "grad_norm": 7.468168258666992, "learning_rate": 9.2513979651358e-08, "loss": 0.3136, "step": 17937 }, { "epoch": 2.6, "grad_norm": 8.743473052978516, "learning_rate": 9.24474993120472e-08, "loss": 0.311, "step": 17938 }, { "epoch": 2.6, "grad_norm": 8.183623313903809, "learning_rate": 9.238104170998995e-08, "loss": 0.3733, "step": 17939 }, { "epoch": 2.6, "grad_norm": 8.72904109954834, "learning_rate": 9.231460684685055e-08, "loss": 0.3022, "step": 17940 }, { "epoch": 2.6, "grad_norm": 8.810832023620605, "learning_rate": 9.224819472429424e-08, "loss": 0.3247, "step": 17941 }, { "epoch": 2.6, "grad_norm": 7.751648902893066, "learning_rate": 9.218180534398402e-08, "loss": 0.3082, "step": 17942 }, { "epoch": 2.6, "grad_norm": 8.890656471252441, "learning_rate": 9.211543870758409e-08, "loss": 0.365, "step": 17943 }, { "epoch": 2.6, "grad_norm": 10.114587783813477, "learning_rate": 9.204909481675638e-08, "loss": 0.4211, "step": 17944 }, { "epoch": 2.6, "grad_norm": 8.496238708496094, "learning_rate": 9.198277367316343e-08, "loss": 0.336, "step": 17945 }, { "epoch": 2.6, "grad_norm": 8.160256385803223, "learning_rate": 9.191647527846669e-08, "loss": 0.3661, "step": 17946 }, { "epoch": 2.6, "grad_norm": 10.295186996459961, "learning_rate": 9.185019963432716e-08, "loss": 0.4313, "step": 17947 }, { "epoch": 2.6, "grad_norm": 8.15620231628418, "learning_rate": 9.178394674240541e-08, "loss": 0.3671, "step": 17948 }, { "epoch": 2.6, "grad_norm": 7.688289165496826, "learning_rate": 9.1717716604361e-08, "loss": 0.35, "step": 17949 }, { "epoch": 2.6, "grad_norm": 9.482269287109375, "learning_rate": 9.165150922185349e-08, "loss": 0.3715, "step": 17950 }, { "epoch": 2.6, "grad_norm": 8.786588668823242, "learning_rate": 9.158532459654155e-08, "loss": 0.3809, "step": 17951 }, { "epoch": 2.6, "grad_norm": 10.118062973022461, "learning_rate": 9.15191627300832e-08, "loss": 0.4234, "step": 17952 }, { "epoch": 2.6, "grad_norm": 10.473893165588379, "learning_rate": 9.145302362413632e-08, "loss": 0.2773, "step": 17953 }, { "epoch": 2.61, "grad_norm": 8.37747573852539, "learning_rate": 9.138690728035725e-08, "loss": 0.3392, "step": 17954 }, { "epoch": 2.61, "grad_norm": 8.167407989501953, "learning_rate": 9.132081370040335e-08, "loss": 0.3232, "step": 17955 }, { "epoch": 2.61, "grad_norm": 9.322062492370605, "learning_rate": 9.125474288592961e-08, "loss": 0.3985, "step": 17956 }, { "epoch": 2.61, "grad_norm": 8.287249565124512, "learning_rate": 9.118869483859216e-08, "loss": 0.3099, "step": 17957 }, { "epoch": 2.61, "grad_norm": 8.329850196838379, "learning_rate": 9.112266956004489e-08, "loss": 0.3502, "step": 17958 }, { "epoch": 2.61, "grad_norm": 9.08533763885498, "learning_rate": 9.105666705194271e-08, "loss": 0.3443, "step": 17959 }, { "epoch": 2.61, "grad_norm": 9.4273099899292, "learning_rate": 9.099068731593862e-08, "loss": 0.3363, "step": 17960 }, { "epoch": 2.61, "grad_norm": 9.449435234069824, "learning_rate": 9.092473035368576e-08, "loss": 0.3667, "step": 17961 }, { "epoch": 2.61, "grad_norm": 9.420038223266602, "learning_rate": 9.0858796166837e-08, "loss": 0.3917, "step": 17962 }, { "epoch": 2.61, "grad_norm": 8.108037948608398, "learning_rate": 9.079288475704361e-08, "loss": 0.3176, "step": 17963 }, { "epoch": 2.61, "grad_norm": 8.315922737121582, "learning_rate": 9.072699612595757e-08, "loss": 0.3413, "step": 17964 }, { "epoch": 2.61, "grad_norm": 8.455182075500488, "learning_rate": 9.066113027522904e-08, "loss": 0.4182, "step": 17965 }, { "epoch": 2.61, "grad_norm": 8.75938892364502, "learning_rate": 9.059528720650833e-08, "loss": 0.3447, "step": 17966 }, { "epoch": 2.61, "grad_norm": 9.756250381469727, "learning_rate": 9.052946692144503e-08, "loss": 0.4365, "step": 17967 }, { "epoch": 2.61, "grad_norm": 8.742559432983398, "learning_rate": 9.046366942168826e-08, "loss": 0.3489, "step": 17968 }, { "epoch": 2.61, "grad_norm": 8.899429321289062, "learning_rate": 9.039789470888637e-08, "loss": 0.3562, "step": 17969 }, { "epoch": 2.61, "grad_norm": 7.844581604003906, "learning_rate": 9.033214278468737e-08, "loss": 0.3268, "step": 17970 }, { "epoch": 2.61, "grad_norm": 8.454198837280273, "learning_rate": 9.02664136507384e-08, "loss": 0.3287, "step": 17971 }, { "epoch": 2.61, "grad_norm": 9.183432579040527, "learning_rate": 9.020070730868623e-08, "loss": 0.3777, "step": 17972 }, { "epoch": 2.61, "grad_norm": 8.206551551818848, "learning_rate": 9.013502376017711e-08, "loss": 0.3466, "step": 17973 }, { "epoch": 2.61, "grad_norm": 9.26830005645752, "learning_rate": 9.006936300685685e-08, "loss": 0.3309, "step": 17974 }, { "epoch": 2.61, "grad_norm": 9.46252727508545, "learning_rate": 9.000372505036979e-08, "loss": 0.3551, "step": 17975 }, { "epoch": 2.61, "grad_norm": 8.270605087280273, "learning_rate": 8.993810989236117e-08, "loss": 0.3501, "step": 17976 }, { "epoch": 2.61, "grad_norm": 8.504486083984375, "learning_rate": 8.987251753447422e-08, "loss": 0.3583, "step": 17977 }, { "epoch": 2.61, "grad_norm": 9.165709495544434, "learning_rate": 8.980694797835287e-08, "loss": 0.4169, "step": 17978 }, { "epoch": 2.61, "grad_norm": 7.676665782928467, "learning_rate": 8.974140122563912e-08, "loss": 0.33, "step": 17979 }, { "epoch": 2.61, "grad_norm": 8.819141387939453, "learning_rate": 8.967587727797588e-08, "loss": 0.3247, "step": 17980 }, { "epoch": 2.61, "grad_norm": 8.57691478729248, "learning_rate": 8.961037613700429e-08, "loss": 0.3139, "step": 17981 }, { "epoch": 2.61, "grad_norm": 8.924031257629395, "learning_rate": 8.954489780436547e-08, "loss": 0.4069, "step": 17982 }, { "epoch": 2.61, "grad_norm": 8.94969367980957, "learning_rate": 8.947944228170002e-08, "loss": 0.3444, "step": 17983 }, { "epoch": 2.61, "grad_norm": 7.9902753829956055, "learning_rate": 8.94140095706476e-08, "loss": 0.3344, "step": 17984 }, { "epoch": 2.61, "grad_norm": 9.524423599243164, "learning_rate": 8.934859967284759e-08, "loss": 0.4291, "step": 17985 }, { "epoch": 2.61, "grad_norm": 9.352887153625488, "learning_rate": 8.928321258993876e-08, "loss": 0.3799, "step": 17986 }, { "epoch": 2.61, "grad_norm": 8.69830322265625, "learning_rate": 8.921784832355939e-08, "loss": 0.3537, "step": 17987 }, { "epoch": 2.61, "grad_norm": 8.815655708312988, "learning_rate": 8.915250687534693e-08, "loss": 0.2763, "step": 17988 }, { "epoch": 2.61, "grad_norm": 9.119647979736328, "learning_rate": 8.908718824693839e-08, "loss": 0.3495, "step": 17989 }, { "epoch": 2.61, "grad_norm": 8.17315673828125, "learning_rate": 8.902189243997027e-08, "loss": 0.3612, "step": 17990 }, { "epoch": 2.61, "grad_norm": 9.408843040466309, "learning_rate": 8.895661945607846e-08, "loss": 0.428, "step": 17991 }, { "epoch": 2.61, "grad_norm": 9.108174324035645, "learning_rate": 8.889136929689834e-08, "loss": 0.4304, "step": 17992 }, { "epoch": 2.61, "grad_norm": 7.36835241317749, "learning_rate": 8.882614196406446e-08, "loss": 0.3527, "step": 17993 }, { "epoch": 2.61, "grad_norm": 9.714607238769531, "learning_rate": 8.87609374592111e-08, "loss": 0.3899, "step": 17994 }, { "epoch": 2.61, "grad_norm": 9.82163143157959, "learning_rate": 8.869575578397203e-08, "loss": 0.3602, "step": 17995 }, { "epoch": 2.61, "grad_norm": 8.704381942749023, "learning_rate": 8.863059693997976e-08, "loss": 0.2957, "step": 17996 }, { "epoch": 2.61, "grad_norm": 9.148592948913574, "learning_rate": 8.856546092886741e-08, "loss": 0.3086, "step": 17997 }, { "epoch": 2.61, "grad_norm": 8.068196296691895, "learning_rate": 8.850034775226622e-08, "loss": 0.3012, "step": 17998 }, { "epoch": 2.61, "grad_norm": 9.015372276306152, "learning_rate": 8.843525741180802e-08, "loss": 0.3243, "step": 17999 }, { "epoch": 2.61, "grad_norm": 8.701339721679688, "learning_rate": 8.837018990912326e-08, "loss": 0.3251, "step": 18000 }, { "epoch": 2.61, "grad_norm": 8.397549629211426, "learning_rate": 8.830514524584199e-08, "loss": 0.3602, "step": 18001 }, { "epoch": 2.61, "grad_norm": 9.932418823242188, "learning_rate": 8.824012342359411e-08, "loss": 0.4534, "step": 18002 }, { "epoch": 2.61, "grad_norm": 8.31434154510498, "learning_rate": 8.817512444400843e-08, "loss": 0.3301, "step": 18003 }, { "epoch": 2.61, "grad_norm": 10.3525390625, "learning_rate": 8.811014830871343e-08, "loss": 0.4206, "step": 18004 }, { "epoch": 2.61, "grad_norm": 7.331718444824219, "learning_rate": 8.804519501933694e-08, "loss": 0.3125, "step": 18005 }, { "epoch": 2.61, "grad_norm": 9.111034393310547, "learning_rate": 8.79802645775064e-08, "loss": 0.3675, "step": 18006 }, { "epoch": 2.61, "grad_norm": 8.412922859191895, "learning_rate": 8.791535698484842e-08, "loss": 0.3454, "step": 18007 }, { "epoch": 2.61, "grad_norm": 9.661287307739258, "learning_rate": 8.785047224298924e-08, "loss": 0.3978, "step": 18008 }, { "epoch": 2.61, "grad_norm": 9.449637413024902, "learning_rate": 8.778561035355459e-08, "loss": 0.356, "step": 18009 }, { "epoch": 2.61, "grad_norm": 9.658278465270996, "learning_rate": 8.77207713181688e-08, "loss": 0.3436, "step": 18010 }, { "epoch": 2.61, "grad_norm": 8.94321346282959, "learning_rate": 8.765595513845725e-08, "loss": 0.3633, "step": 18011 }, { "epoch": 2.61, "grad_norm": 9.443288803100586, "learning_rate": 8.759116181604287e-08, "loss": 0.3506, "step": 18012 }, { "epoch": 2.61, "grad_norm": 8.890478134155273, "learning_rate": 8.752639135254969e-08, "loss": 0.4233, "step": 18013 }, { "epoch": 2.61, "grad_norm": 9.934646606445312, "learning_rate": 8.746164374960018e-08, "loss": 0.401, "step": 18014 }, { "epoch": 2.61, "grad_norm": 8.491660118103027, "learning_rate": 8.73969190088164e-08, "loss": 0.3626, "step": 18015 }, { "epoch": 2.61, "grad_norm": 8.314986228942871, "learning_rate": 8.733221713182027e-08, "loss": 0.3379, "step": 18016 }, { "epoch": 2.61, "grad_norm": 8.6602144241333, "learning_rate": 8.726753812023201e-08, "loss": 0.3547, "step": 18017 }, { "epoch": 2.61, "grad_norm": 8.112407684326172, "learning_rate": 8.720288197567305e-08, "loss": 0.3099, "step": 18018 }, { "epoch": 2.61, "grad_norm": 8.384207725524902, "learning_rate": 8.713824869976228e-08, "loss": 0.388, "step": 18019 }, { "epoch": 2.61, "grad_norm": 7.425502777099609, "learning_rate": 8.707363829411984e-08, "loss": 0.3103, "step": 18020 }, { "epoch": 2.61, "grad_norm": 9.062649726867676, "learning_rate": 8.70090507603639e-08, "loss": 0.3654, "step": 18021 }, { "epoch": 2.61, "grad_norm": 7.707516193389893, "learning_rate": 8.694448610011273e-08, "loss": 0.3956, "step": 18022 }, { "epoch": 2.62, "grad_norm": 8.702126502990723, "learning_rate": 8.68799443149838e-08, "loss": 0.341, "step": 18023 }, { "epoch": 2.62, "grad_norm": 8.802447319030762, "learning_rate": 8.681542540659426e-08, "loss": 0.363, "step": 18024 }, { "epoch": 2.62, "grad_norm": 9.819302558898926, "learning_rate": 8.675092937656048e-08, "loss": 0.375, "step": 18025 }, { "epoch": 2.62, "grad_norm": 10.389423370361328, "learning_rate": 8.66864562264984e-08, "loss": 0.4018, "step": 18026 }, { "epoch": 2.62, "grad_norm": 9.225327491760254, "learning_rate": 8.662200595802305e-08, "loss": 0.3111, "step": 18027 }, { "epoch": 2.62, "grad_norm": 8.533100128173828, "learning_rate": 8.655757857274926e-08, "loss": 0.3556, "step": 18028 }, { "epoch": 2.62, "grad_norm": 8.436114311218262, "learning_rate": 8.649317407229118e-08, "loss": 0.3588, "step": 18029 }, { "epoch": 2.62, "grad_norm": 8.943601608276367, "learning_rate": 8.64287924582625e-08, "loss": 0.4141, "step": 18030 }, { "epoch": 2.62, "grad_norm": 9.191336631774902, "learning_rate": 8.636443373227564e-08, "loss": 0.3795, "step": 18031 }, { "epoch": 2.62, "grad_norm": 9.832354545593262, "learning_rate": 8.630009789594384e-08, "loss": 0.3471, "step": 18032 }, { "epoch": 2.62, "grad_norm": 7.7481889724731445, "learning_rate": 8.623578495087802e-08, "loss": 0.2909, "step": 18033 }, { "epoch": 2.62, "grad_norm": 8.83651065826416, "learning_rate": 8.617149489869035e-08, "loss": 0.3344, "step": 18034 }, { "epoch": 2.62, "grad_norm": 7.920432090759277, "learning_rate": 8.610722774099077e-08, "loss": 0.3165, "step": 18035 }, { "epoch": 2.62, "grad_norm": 9.819409370422363, "learning_rate": 8.604298347938976e-08, "loss": 0.3459, "step": 18036 }, { "epoch": 2.62, "grad_norm": 9.353538513183594, "learning_rate": 8.59787621154967e-08, "loss": 0.4096, "step": 18037 }, { "epoch": 2.62, "grad_norm": 8.150981903076172, "learning_rate": 8.591456365092076e-08, "loss": 0.336, "step": 18038 }, { "epoch": 2.62, "grad_norm": 8.840657234191895, "learning_rate": 8.585038808727008e-08, "loss": 0.3608, "step": 18039 }, { "epoch": 2.62, "grad_norm": 8.88293743133545, "learning_rate": 8.578623542615248e-08, "loss": 0.3667, "step": 18040 }, { "epoch": 2.62, "grad_norm": 8.460838317871094, "learning_rate": 8.572210566917559e-08, "loss": 0.3251, "step": 18041 }, { "epoch": 2.62, "grad_norm": 8.514413833618164, "learning_rate": 8.565799881794567e-08, "loss": 0.3409, "step": 18042 }, { "epoch": 2.62, "grad_norm": 8.395437240600586, "learning_rate": 8.559391487406898e-08, "loss": 0.3305, "step": 18043 }, { "epoch": 2.62, "grad_norm": 8.893025398254395, "learning_rate": 8.55298538391509e-08, "loss": 0.337, "step": 18044 }, { "epoch": 2.62, "grad_norm": 8.904631614685059, "learning_rate": 8.546581571479661e-08, "loss": 0.358, "step": 18045 }, { "epoch": 2.62, "grad_norm": 9.633286476135254, "learning_rate": 8.540180050261025e-08, "loss": 0.3733, "step": 18046 }, { "epoch": 2.62, "grad_norm": 8.96839714050293, "learning_rate": 8.533780820419567e-08, "loss": 0.3362, "step": 18047 }, { "epoch": 2.62, "grad_norm": 9.143260955810547, "learning_rate": 8.527383882115624e-08, "loss": 0.3471, "step": 18048 }, { "epoch": 2.62, "grad_norm": 8.939697265625, "learning_rate": 8.520989235509446e-08, "loss": 0.34, "step": 18049 }, { "epoch": 2.62, "grad_norm": 8.98071002960205, "learning_rate": 8.514596880761238e-08, "loss": 0.3667, "step": 18050 }, { "epoch": 2.62, "grad_norm": 7.512614727020264, "learning_rate": 8.508206818031182e-08, "loss": 0.3133, "step": 18051 }, { "epoch": 2.62, "grad_norm": 7.918363571166992, "learning_rate": 8.501819047479308e-08, "loss": 0.3236, "step": 18052 }, { "epoch": 2.62, "grad_norm": 8.299077033996582, "learning_rate": 8.495433569265731e-08, "loss": 0.3787, "step": 18053 }, { "epoch": 2.62, "grad_norm": 9.718949317932129, "learning_rate": 8.489050383550345e-08, "loss": 0.3756, "step": 18054 }, { "epoch": 2.62, "grad_norm": 9.420366287231445, "learning_rate": 8.482669490493145e-08, "loss": 0.3418, "step": 18055 }, { "epoch": 2.62, "grad_norm": 7.645281791687012, "learning_rate": 8.476290890253945e-08, "loss": 0.3242, "step": 18056 }, { "epoch": 2.62, "grad_norm": 9.193737983703613, "learning_rate": 8.469914582992577e-08, "loss": 0.3588, "step": 18057 }, { "epoch": 2.62, "grad_norm": 8.963501930236816, "learning_rate": 8.463540568868766e-08, "loss": 0.4283, "step": 18058 }, { "epoch": 2.62, "grad_norm": 7.707941055297852, "learning_rate": 8.457168848042218e-08, "loss": 0.3757, "step": 18059 }, { "epoch": 2.62, "grad_norm": 8.749139785766602, "learning_rate": 8.450799420672561e-08, "loss": 0.3559, "step": 18060 }, { "epoch": 2.62, "grad_norm": 8.468470573425293, "learning_rate": 8.444432286919378e-08, "loss": 0.3288, "step": 18061 }, { "epoch": 2.62, "grad_norm": 8.659062385559082, "learning_rate": 8.438067446942177e-08, "loss": 0.3797, "step": 18062 }, { "epoch": 2.62, "grad_norm": 9.01215934753418, "learning_rate": 8.431704900900427e-08, "loss": 0.3051, "step": 18063 }, { "epoch": 2.62, "grad_norm": 8.881436347961426, "learning_rate": 8.425344648953525e-08, "loss": 0.3656, "step": 18064 }, { "epoch": 2.62, "grad_norm": 8.479085922241211, "learning_rate": 8.41898669126082e-08, "loss": 0.3703, "step": 18065 }, { "epoch": 2.62, "grad_norm": 8.338228225708008, "learning_rate": 8.41263102798161e-08, "loss": 0.3343, "step": 18066 }, { "epoch": 2.62, "grad_norm": 9.908934593200684, "learning_rate": 8.406277659275097e-08, "loss": 0.4543, "step": 18067 }, { "epoch": 2.62, "grad_norm": 9.368916511535645, "learning_rate": 8.399926585300488e-08, "loss": 0.3679, "step": 18068 }, { "epoch": 2.62, "grad_norm": 10.296015739440918, "learning_rate": 8.393577806216878e-08, "loss": 0.3975, "step": 18069 }, { "epoch": 2.62, "grad_norm": 7.934205532073975, "learning_rate": 8.38723132218333e-08, "loss": 0.2886, "step": 18070 }, { "epoch": 2.62, "grad_norm": 8.73447322845459, "learning_rate": 8.38088713335885e-08, "loss": 0.3122, "step": 18071 }, { "epoch": 2.62, "grad_norm": 8.060004234313965, "learning_rate": 8.374545239902397e-08, "loss": 0.3314, "step": 18072 }, { "epoch": 2.62, "grad_norm": 7.788313388824463, "learning_rate": 8.368205641972792e-08, "loss": 0.3145, "step": 18073 }, { "epoch": 2.62, "grad_norm": 7.589546203613281, "learning_rate": 8.36186833972895e-08, "loss": 0.279, "step": 18074 }, { "epoch": 2.62, "grad_norm": 8.173587799072266, "learning_rate": 8.355533333329567e-08, "loss": 0.3635, "step": 18075 }, { "epoch": 2.62, "grad_norm": 8.381993293762207, "learning_rate": 8.349200622933428e-08, "loss": 0.3553, "step": 18076 }, { "epoch": 2.62, "grad_norm": 9.37894058227539, "learning_rate": 8.342870208699127e-08, "loss": 0.3812, "step": 18077 }, { "epoch": 2.62, "grad_norm": 9.62462329864502, "learning_rate": 8.336542090785282e-08, "loss": 0.3475, "step": 18078 }, { "epoch": 2.62, "grad_norm": 7.855075836181641, "learning_rate": 8.330216269350443e-08, "loss": 0.3045, "step": 18079 }, { "epoch": 2.62, "grad_norm": 8.258618354797363, "learning_rate": 8.323892744553085e-08, "loss": 0.3232, "step": 18080 }, { "epoch": 2.62, "grad_norm": 7.658999919891357, "learning_rate": 8.317571516551647e-08, "loss": 0.3194, "step": 18081 }, { "epoch": 2.62, "grad_norm": 7.9886474609375, "learning_rate": 8.31125258550447e-08, "loss": 0.2857, "step": 18082 }, { "epoch": 2.62, "grad_norm": 8.597043991088867, "learning_rate": 8.304935951569891e-08, "loss": 0.3162, "step": 18083 }, { "epoch": 2.62, "grad_norm": 9.002674102783203, "learning_rate": 8.298621614906154e-08, "loss": 0.3684, "step": 18084 }, { "epoch": 2.62, "grad_norm": 8.639618873596191, "learning_rate": 8.292309575671452e-08, "loss": 0.3458, "step": 18085 }, { "epoch": 2.62, "grad_norm": 7.903400421142578, "learning_rate": 8.285999834023938e-08, "loss": 0.3359, "step": 18086 }, { "epoch": 2.62, "grad_norm": 10.059588432312012, "learning_rate": 8.27969239012164e-08, "loss": 0.3826, "step": 18087 }, { "epoch": 2.62, "grad_norm": 8.669896125793457, "learning_rate": 8.273387244122665e-08, "loss": 0.35, "step": 18088 }, { "epoch": 2.62, "grad_norm": 8.048450469970703, "learning_rate": 8.267084396184887e-08, "loss": 0.3254, "step": 18089 }, { "epoch": 2.62, "grad_norm": 8.339471817016602, "learning_rate": 8.260783846466302e-08, "loss": 0.2905, "step": 18090 }, { "epoch": 2.62, "grad_norm": 10.855348587036133, "learning_rate": 8.254485595124694e-08, "loss": 0.4148, "step": 18091 }, { "epoch": 2.63, "grad_norm": 9.393718719482422, "learning_rate": 8.248189642317871e-08, "loss": 0.3715, "step": 18092 }, { "epoch": 2.63, "grad_norm": 9.787887573242188, "learning_rate": 8.241895988203607e-08, "loss": 0.3678, "step": 18093 }, { "epoch": 2.63, "grad_norm": 8.013652801513672, "learning_rate": 8.235604632939508e-08, "loss": 0.3604, "step": 18094 }, { "epoch": 2.63, "grad_norm": 7.784646987915039, "learning_rate": 8.22931557668327e-08, "loss": 0.3455, "step": 18095 }, { "epoch": 2.63, "grad_norm": 8.467514038085938, "learning_rate": 8.223028819592382e-08, "loss": 0.3408, "step": 18096 }, { "epoch": 2.63, "grad_norm": 8.891825675964355, "learning_rate": 8.216744361824424e-08, "loss": 0.375, "step": 18097 }, { "epoch": 2.63, "grad_norm": 17.950551986694336, "learning_rate": 8.210462203536783e-08, "loss": 0.3413, "step": 18098 }, { "epoch": 2.63, "grad_norm": 8.582846641540527, "learning_rate": 8.204182344886868e-08, "loss": 0.3302, "step": 18099 }, { "epoch": 2.63, "grad_norm": 9.510683059692383, "learning_rate": 8.197904786032006e-08, "loss": 0.3505, "step": 18100 }, { "epoch": 2.63, "grad_norm": 8.717695236206055, "learning_rate": 8.191629527129495e-08, "loss": 0.3521, "step": 18101 }, { "epoch": 2.63, "grad_norm": 8.954456329345703, "learning_rate": 8.185356568336521e-08, "loss": 0.3608, "step": 18102 }, { "epoch": 2.63, "grad_norm": 7.709922790527344, "learning_rate": 8.179085909810257e-08, "loss": 0.3222, "step": 18103 }, { "epoch": 2.63, "grad_norm": 7.6084136962890625, "learning_rate": 8.1728175517078e-08, "loss": 0.3113, "step": 18104 }, { "epoch": 2.63, "grad_norm": 9.581089973449707, "learning_rate": 8.166551494186203e-08, "loss": 0.3961, "step": 18105 }, { "epoch": 2.63, "grad_norm": 7.929973125457764, "learning_rate": 8.160287737402437e-08, "loss": 0.373, "step": 18106 }, { "epoch": 2.63, "grad_norm": 9.569615364074707, "learning_rate": 8.15402628151346e-08, "loss": 0.3953, "step": 18107 }, { "epoch": 2.63, "grad_norm": 9.433273315429688, "learning_rate": 8.147767126676075e-08, "loss": 0.3871, "step": 18108 }, { "epoch": 2.63, "grad_norm": 8.736184120178223, "learning_rate": 8.141510273047191e-08, "loss": 0.3731, "step": 18109 }, { "epoch": 2.63, "grad_norm": 8.747417449951172, "learning_rate": 8.135255720783474e-08, "loss": 0.3476, "step": 18110 }, { "epoch": 2.63, "grad_norm": 9.20209789276123, "learning_rate": 8.129003470041684e-08, "loss": 0.4029, "step": 18111 }, { "epoch": 2.63, "grad_norm": 7.634273052215576, "learning_rate": 8.122753520978432e-08, "loss": 0.3036, "step": 18112 }, { "epoch": 2.63, "grad_norm": 8.928032875061035, "learning_rate": 8.116505873750301e-08, "loss": 0.3355, "step": 18113 }, { "epoch": 2.63, "grad_norm": 7.815375804901123, "learning_rate": 8.110260528513824e-08, "loss": 0.2871, "step": 18114 }, { "epoch": 2.63, "grad_norm": 8.144587516784668, "learning_rate": 8.104017485425463e-08, "loss": 0.3203, "step": 18115 }, { "epoch": 2.63, "grad_norm": 8.585762977600098, "learning_rate": 8.097776744641626e-08, "loss": 0.3413, "step": 18116 }, { "epoch": 2.63, "grad_norm": 8.350834846496582, "learning_rate": 8.091538306318645e-08, "loss": 0.338, "step": 18117 }, { "epoch": 2.63, "grad_norm": 8.794785499572754, "learning_rate": 8.085302170612884e-08, "loss": 0.3201, "step": 18118 }, { "epoch": 2.63, "grad_norm": 8.471633911132812, "learning_rate": 8.079068337680506e-08, "loss": 0.3282, "step": 18119 }, { "epoch": 2.63, "grad_norm": 7.891509056091309, "learning_rate": 8.072836807677707e-08, "loss": 0.3426, "step": 18120 }, { "epoch": 2.63, "grad_norm": 9.619361877441406, "learning_rate": 8.066607580760631e-08, "loss": 0.3592, "step": 18121 }, { "epoch": 2.63, "grad_norm": 9.036459922790527, "learning_rate": 8.060380657085319e-08, "loss": 0.3583, "step": 18122 }, { "epoch": 2.63, "grad_norm": 8.302240371704102, "learning_rate": 8.05415603680778e-08, "loss": 0.3587, "step": 18123 }, { "epoch": 2.63, "grad_norm": 8.552373886108398, "learning_rate": 8.047933720083976e-08, "loss": 0.3712, "step": 18124 }, { "epoch": 2.63, "grad_norm": 9.108497619628906, "learning_rate": 8.041713707069774e-08, "loss": 0.3424, "step": 18125 }, { "epoch": 2.63, "grad_norm": 8.05280590057373, "learning_rate": 8.035495997921027e-08, "loss": 0.341, "step": 18126 }, { "epoch": 2.63, "grad_norm": 8.04253101348877, "learning_rate": 8.029280592793497e-08, "loss": 0.3345, "step": 18127 }, { "epoch": 2.63, "grad_norm": 8.053315162658691, "learning_rate": 8.023067491842927e-08, "loss": 0.3232, "step": 18128 }, { "epoch": 2.63, "grad_norm": 8.673057556152344, "learning_rate": 8.016856695224917e-08, "loss": 0.3247, "step": 18129 }, { "epoch": 2.63, "grad_norm": 9.542259216308594, "learning_rate": 8.010648203095138e-08, "loss": 0.3711, "step": 18130 }, { "epoch": 2.63, "grad_norm": 9.565154075622559, "learning_rate": 8.00444201560907e-08, "loss": 0.4261, "step": 18131 }, { "epoch": 2.63, "grad_norm": 9.787549018859863, "learning_rate": 7.998238132922264e-08, "loss": 0.3142, "step": 18132 }, { "epoch": 2.63, "grad_norm": 9.113149642944336, "learning_rate": 7.992036555190096e-08, "loss": 0.3828, "step": 18133 }, { "epoch": 2.63, "grad_norm": 8.953264236450195, "learning_rate": 7.985837282567954e-08, "loss": 0.303, "step": 18134 }, { "epoch": 2.63, "grad_norm": 9.110892295837402, "learning_rate": 7.979640315211155e-08, "loss": 0.3533, "step": 18135 }, { "epoch": 2.63, "grad_norm": 8.276338577270508, "learning_rate": 7.973445653274946e-08, "loss": 0.3441, "step": 18136 }, { "epoch": 2.63, "grad_norm": 8.224212646484375, "learning_rate": 7.967253296914533e-08, "loss": 0.302, "step": 18137 }, { "epoch": 2.63, "grad_norm": 9.74586009979248, "learning_rate": 7.961063246285049e-08, "loss": 0.313, "step": 18138 }, { "epoch": 2.63, "grad_norm": 7.706568241119385, "learning_rate": 7.95487550154158e-08, "loss": 0.2985, "step": 18139 }, { "epoch": 2.63, "grad_norm": 8.214006423950195, "learning_rate": 7.948690062839148e-08, "loss": 0.3188, "step": 18140 }, { "epoch": 2.63, "grad_norm": 8.867640495300293, "learning_rate": 7.942506930332715e-08, "loss": 0.325, "step": 18141 }, { "epoch": 2.63, "grad_norm": 9.464118003845215, "learning_rate": 7.936326104177204e-08, "loss": 0.361, "step": 18142 }, { "epoch": 2.63, "grad_norm": 9.198821067810059, "learning_rate": 7.930147584527435e-08, "loss": 0.3474, "step": 18143 }, { "epoch": 2.63, "grad_norm": 7.891025543212891, "learning_rate": 7.92397137153823e-08, "loss": 0.2914, "step": 18144 }, { "epoch": 2.63, "grad_norm": 10.411744117736816, "learning_rate": 7.917797465364317e-08, "loss": 0.4436, "step": 18145 }, { "epoch": 2.63, "grad_norm": 8.507781982421875, "learning_rate": 7.911625866160365e-08, "loss": 0.3352, "step": 18146 }, { "epoch": 2.63, "grad_norm": 9.636757850646973, "learning_rate": 7.905456574081026e-08, "loss": 0.3182, "step": 18147 }, { "epoch": 2.63, "grad_norm": 9.473784446716309, "learning_rate": 7.899289589280789e-08, "loss": 0.3993, "step": 18148 }, { "epoch": 2.63, "grad_norm": 8.41756820678711, "learning_rate": 7.893124911914251e-08, "loss": 0.3246, "step": 18149 }, { "epoch": 2.63, "grad_norm": 9.187124252319336, "learning_rate": 7.886962542135767e-08, "loss": 0.3227, "step": 18150 }, { "epoch": 2.63, "grad_norm": 7.6359639167785645, "learning_rate": 7.880802480099802e-08, "loss": 0.3345, "step": 18151 }, { "epoch": 2.63, "grad_norm": 9.44265365600586, "learning_rate": 7.874644725960622e-08, "loss": 0.3906, "step": 18152 }, { "epoch": 2.63, "grad_norm": 9.807957649230957, "learning_rate": 7.868489279872558e-08, "loss": 0.3653, "step": 18153 }, { "epoch": 2.63, "grad_norm": 9.180715560913086, "learning_rate": 7.86233614198979e-08, "loss": 0.3328, "step": 18154 }, { "epoch": 2.63, "grad_norm": 8.553266525268555, "learning_rate": 7.85618531246648e-08, "loss": 0.3871, "step": 18155 }, { "epoch": 2.63, "grad_norm": 8.53841495513916, "learning_rate": 7.850036791456727e-08, "loss": 0.3421, "step": 18156 }, { "epoch": 2.63, "grad_norm": 8.298638343811035, "learning_rate": 7.843890579114565e-08, "loss": 0.3073, "step": 18157 }, { "epoch": 2.63, "grad_norm": 9.32925033569336, "learning_rate": 7.837746675594003e-08, "loss": 0.3889, "step": 18158 }, { "epoch": 2.63, "grad_norm": 7.859634876251221, "learning_rate": 7.831605081048943e-08, "loss": 0.3161, "step": 18159 }, { "epoch": 2.63, "grad_norm": 8.722838401794434, "learning_rate": 7.825465795633256e-08, "loss": 0.346, "step": 18160 }, { "epoch": 2.64, "grad_norm": 8.050166130065918, "learning_rate": 7.81932881950076e-08, "loss": 0.2921, "step": 18161 }, { "epoch": 2.64, "grad_norm": 9.222953796386719, "learning_rate": 7.813194152805202e-08, "loss": 0.3753, "step": 18162 }, { "epoch": 2.64, "grad_norm": 10.47433853149414, "learning_rate": 7.807061795700298e-08, "loss": 0.346, "step": 18163 }, { "epoch": 2.64, "grad_norm": 9.159646034240723, "learning_rate": 7.800931748339623e-08, "loss": 0.3237, "step": 18164 }, { "epoch": 2.64, "grad_norm": 8.650748252868652, "learning_rate": 7.794804010876833e-08, "loss": 0.3225, "step": 18165 }, { "epoch": 2.64, "grad_norm": 7.918169975280762, "learning_rate": 7.78867858346537e-08, "loss": 0.3218, "step": 18166 }, { "epoch": 2.64, "grad_norm": 9.71899700164795, "learning_rate": 7.782555466258777e-08, "loss": 0.3202, "step": 18167 }, { "epoch": 2.64, "grad_norm": 8.558807373046875, "learning_rate": 7.776434659410403e-08, "loss": 0.3227, "step": 18168 }, { "epoch": 2.64, "grad_norm": 8.826353073120117, "learning_rate": 7.770316163073586e-08, "loss": 0.3978, "step": 18169 }, { "epoch": 2.64, "grad_norm": 8.391024589538574, "learning_rate": 7.764199977401687e-08, "loss": 0.3652, "step": 18170 }, { "epoch": 2.64, "grad_norm": 9.37454605102539, "learning_rate": 7.758086102547844e-08, "loss": 0.3196, "step": 18171 }, { "epoch": 2.64, "grad_norm": 9.378217697143555, "learning_rate": 7.751974538665317e-08, "loss": 0.3626, "step": 18172 }, { "epoch": 2.64, "grad_norm": 8.765921592712402, "learning_rate": 7.74586528590716e-08, "loss": 0.3612, "step": 18173 }, { "epoch": 2.64, "grad_norm": 7.461207389831543, "learning_rate": 7.73975834442645e-08, "loss": 0.3569, "step": 18174 }, { "epoch": 2.64, "grad_norm": 8.29815673828125, "learning_rate": 7.733653714376198e-08, "loss": 0.3994, "step": 18175 }, { "epoch": 2.64, "grad_norm": 8.56029224395752, "learning_rate": 7.727551395909327e-08, "loss": 0.3232, "step": 18176 }, { "epoch": 2.64, "grad_norm": 10.097784042358398, "learning_rate": 7.721451389178723e-08, "loss": 0.389, "step": 18177 }, { "epoch": 2.64, "grad_norm": 8.698579788208008, "learning_rate": 7.715353694337235e-08, "loss": 0.3531, "step": 18178 }, { "epoch": 2.64, "grad_norm": 11.714735984802246, "learning_rate": 7.709258311537604e-08, "loss": 0.4684, "step": 18179 }, { "epoch": 2.64, "grad_norm": 8.904850959777832, "learning_rate": 7.703165240932541e-08, "loss": 0.3737, "step": 18180 }, { "epoch": 2.64, "grad_norm": 8.455222129821777, "learning_rate": 7.697074482674715e-08, "loss": 0.3577, "step": 18181 }, { "epoch": 2.64, "grad_norm": 9.261375427246094, "learning_rate": 7.690986036916724e-08, "loss": 0.31, "step": 18182 }, { "epoch": 2.64, "grad_norm": 8.211348533630371, "learning_rate": 7.684899903811059e-08, "loss": 0.3972, "step": 18183 }, { "epoch": 2.64, "grad_norm": 9.61980152130127, "learning_rate": 7.678816083510275e-08, "loss": 0.3949, "step": 18184 }, { "epoch": 2.64, "grad_norm": 8.856720924377441, "learning_rate": 7.672734576166695e-08, "loss": 0.3393, "step": 18185 }, { "epoch": 2.64, "grad_norm": 8.990071296691895, "learning_rate": 7.666655381932785e-08, "loss": 0.3227, "step": 18186 }, { "epoch": 2.64, "grad_norm": 7.725500583648682, "learning_rate": 7.660578500960757e-08, "loss": 0.3385, "step": 18187 }, { "epoch": 2.64, "grad_norm": 7.72682523727417, "learning_rate": 7.654503933402933e-08, "loss": 0.3212, "step": 18188 }, { "epoch": 2.64, "grad_norm": 9.506243705749512, "learning_rate": 7.648431679411449e-08, "loss": 0.3414, "step": 18189 }, { "epoch": 2.64, "grad_norm": 9.396066665649414, "learning_rate": 7.64236173913846e-08, "loss": 0.3924, "step": 18190 }, { "epoch": 2.64, "grad_norm": 8.255547523498535, "learning_rate": 7.63629411273602e-08, "loss": 0.341, "step": 18191 }, { "epoch": 2.64, "grad_norm": 7.927854537963867, "learning_rate": 7.630228800356164e-08, "loss": 0.3609, "step": 18192 }, { "epoch": 2.64, "grad_norm": 8.255609512329102, "learning_rate": 7.62416580215084e-08, "loss": 0.3093, "step": 18193 }, { "epoch": 2.64, "grad_norm": 8.574400901794434, "learning_rate": 7.618105118271944e-08, "loss": 0.3363, "step": 18194 }, { "epoch": 2.64, "grad_norm": 8.62270450592041, "learning_rate": 7.612046748871326e-08, "loss": 0.3164, "step": 18195 }, { "epoch": 2.64, "grad_norm": 7.640068531036377, "learning_rate": 7.605990694100761e-08, "loss": 0.3273, "step": 18196 }, { "epoch": 2.64, "grad_norm": 8.221510887145996, "learning_rate": 7.599936954111974e-08, "loss": 0.2986, "step": 18197 }, { "epoch": 2.64, "grad_norm": 10.241133689880371, "learning_rate": 7.593885529056632e-08, "loss": 0.3788, "step": 18198 }, { "epoch": 2.64, "grad_norm": 8.051037788391113, "learning_rate": 7.587836419086335e-08, "loss": 0.3234, "step": 18199 }, { "epoch": 2.64, "grad_norm": 7.2649455070495605, "learning_rate": 7.581789624352653e-08, "loss": 0.2778, "step": 18200 }, { "epoch": 2.64, "grad_norm": 8.398710250854492, "learning_rate": 7.575745145007062e-08, "loss": 0.31, "step": 18201 }, { "epoch": 2.64, "grad_norm": 9.780464172363281, "learning_rate": 7.56970298120101e-08, "loss": 0.3868, "step": 18202 }, { "epoch": 2.64, "grad_norm": 8.28754997253418, "learning_rate": 7.563663133085873e-08, "loss": 0.325, "step": 18203 }, { "epoch": 2.64, "grad_norm": 8.31530475616455, "learning_rate": 7.557625600812934e-08, "loss": 0.3193, "step": 18204 }, { "epoch": 2.64, "grad_norm": 8.028007507324219, "learning_rate": 7.551590384533523e-08, "loss": 0.386, "step": 18205 }, { "epoch": 2.64, "grad_norm": 9.394448280334473, "learning_rate": 7.545557484398757e-08, "loss": 0.3258, "step": 18206 }, { "epoch": 2.64, "grad_norm": 7.766368389129639, "learning_rate": 7.539526900559867e-08, "loss": 0.2896, "step": 18207 }, { "epoch": 2.64, "grad_norm": 8.579423904418945, "learning_rate": 7.533498633167867e-08, "loss": 0.39, "step": 18208 }, { "epoch": 2.64, "grad_norm": 9.384928703308105, "learning_rate": 7.527472682373847e-08, "loss": 0.3641, "step": 18209 }, { "epoch": 2.64, "grad_norm": 8.846685409545898, "learning_rate": 7.521449048328721e-08, "loss": 0.3514, "step": 18210 }, { "epoch": 2.64, "grad_norm": 7.9807305335998535, "learning_rate": 7.515427731183433e-08, "loss": 0.3397, "step": 18211 }, { "epoch": 2.64, "grad_norm": 7.707273483276367, "learning_rate": 7.50940873108884e-08, "loss": 0.3055, "step": 18212 }, { "epoch": 2.64, "grad_norm": 9.926129341125488, "learning_rate": 7.503392048195723e-08, "loss": 0.3329, "step": 18213 }, { "epoch": 2.64, "grad_norm": 9.116583824157715, "learning_rate": 7.497377682654826e-08, "loss": 0.3631, "step": 18214 }, { "epoch": 2.64, "grad_norm": 9.197214126586914, "learning_rate": 7.491365634616831e-08, "loss": 0.4286, "step": 18215 }, { "epoch": 2.64, "grad_norm": 9.033109664916992, "learning_rate": 7.48535590423236e-08, "loss": 0.4046, "step": 18216 }, { "epoch": 2.64, "grad_norm": 7.515629768371582, "learning_rate": 7.479348491651971e-08, "loss": 0.302, "step": 18217 }, { "epoch": 2.64, "grad_norm": 8.070588111877441, "learning_rate": 7.473343397026189e-08, "loss": 0.3639, "step": 18218 }, { "epoch": 2.64, "grad_norm": 8.408302307128906, "learning_rate": 7.467340620505447e-08, "loss": 0.3308, "step": 18219 }, { "epoch": 2.64, "grad_norm": 8.145371437072754, "learning_rate": 7.461340162240115e-08, "loss": 0.347, "step": 18220 }, { "epoch": 2.64, "grad_norm": 8.159777641296387, "learning_rate": 7.455342022380573e-08, "loss": 0.326, "step": 18221 }, { "epoch": 2.64, "grad_norm": 8.009827613830566, "learning_rate": 7.449346201077045e-08, "loss": 0.3024, "step": 18222 }, { "epoch": 2.64, "grad_norm": 10.061186790466309, "learning_rate": 7.443352698479777e-08, "loss": 0.354, "step": 18223 }, { "epoch": 2.64, "grad_norm": 8.80652904510498, "learning_rate": 7.437361514738927e-08, "loss": 0.3679, "step": 18224 }, { "epoch": 2.64, "grad_norm": 8.956058502197266, "learning_rate": 7.431372650004563e-08, "loss": 0.3502, "step": 18225 }, { "epoch": 2.64, "grad_norm": 10.207218170166016, "learning_rate": 7.425386104426779e-08, "loss": 0.3698, "step": 18226 }, { "epoch": 2.64, "grad_norm": 9.039634704589844, "learning_rate": 7.419401878155495e-08, "loss": 0.2746, "step": 18227 }, { "epoch": 2.64, "grad_norm": 8.188892364501953, "learning_rate": 7.413419971340707e-08, "loss": 0.3336, "step": 18228 }, { "epoch": 2.64, "grad_norm": 10.622422218322754, "learning_rate": 7.407440384132224e-08, "loss": 0.4308, "step": 18229 }, { "epoch": 2.65, "grad_norm": 10.699012756347656, "learning_rate": 7.401463116679874e-08, "loss": 0.3784, "step": 18230 }, { "epoch": 2.65, "grad_norm": 9.012019157409668, "learning_rate": 7.395488169133412e-08, "loss": 0.3551, "step": 18231 }, { "epoch": 2.65, "grad_norm": 8.058557510375977, "learning_rate": 7.389515541642532e-08, "loss": 0.3294, "step": 18232 }, { "epoch": 2.65, "grad_norm": 9.325948715209961, "learning_rate": 7.383545234356858e-08, "loss": 0.403, "step": 18233 }, { "epoch": 2.65, "grad_norm": 8.313699722290039, "learning_rate": 7.37757724742597e-08, "loss": 0.3288, "step": 18234 }, { "epoch": 2.65, "grad_norm": 7.625829696655273, "learning_rate": 7.371611580999393e-08, "loss": 0.3575, "step": 18235 }, { "epoch": 2.65, "grad_norm": 8.333171844482422, "learning_rate": 7.365648235226574e-08, "loss": 0.3065, "step": 18236 }, { "epoch": 2.65, "grad_norm": 9.102927207946777, "learning_rate": 7.359687210256926e-08, "loss": 0.3538, "step": 18237 }, { "epoch": 2.65, "grad_norm": 8.917389869689941, "learning_rate": 7.353728506239809e-08, "loss": 0.3358, "step": 18238 }, { "epoch": 2.65, "grad_norm": 9.455699920654297, "learning_rate": 7.347772123324459e-08, "loss": 0.386, "step": 18239 }, { "epoch": 2.65, "grad_norm": 9.123337745666504, "learning_rate": 7.341818061660165e-08, "loss": 0.3683, "step": 18240 }, { "epoch": 2.65, "grad_norm": 7.83175802230835, "learning_rate": 7.335866321396045e-08, "loss": 0.3382, "step": 18241 }, { "epoch": 2.65, "grad_norm": 9.023067474365234, "learning_rate": 7.329916902681244e-08, "loss": 0.3503, "step": 18242 }, { "epoch": 2.65, "grad_norm": 9.115602493286133, "learning_rate": 7.323969805664787e-08, "loss": 0.3761, "step": 18243 }, { "epoch": 2.65, "grad_norm": 8.2354736328125, "learning_rate": 7.3180250304957e-08, "loss": 0.4044, "step": 18244 }, { "epoch": 2.65, "grad_norm": 8.449377059936523, "learning_rate": 7.312082577322898e-08, "loss": 0.3638, "step": 18245 }, { "epoch": 2.65, "grad_norm": 8.228157997131348, "learning_rate": 7.30614244629526e-08, "loss": 0.3737, "step": 18246 }, { "epoch": 2.65, "grad_norm": 8.841957092285156, "learning_rate": 7.300204637561613e-08, "loss": 0.3523, "step": 18247 }, { "epoch": 2.65, "grad_norm": 8.202956199645996, "learning_rate": 7.294269151270704e-08, "loss": 0.3096, "step": 18248 }, { "epoch": 2.65, "grad_norm": 8.233297348022461, "learning_rate": 7.288335987571282e-08, "loss": 0.3299, "step": 18249 }, { "epoch": 2.65, "grad_norm": 8.59618091583252, "learning_rate": 7.282405146611936e-08, "loss": 0.3745, "step": 18250 }, { "epoch": 2.65, "grad_norm": 10.138616561889648, "learning_rate": 7.276476628541295e-08, "loss": 0.4488, "step": 18251 }, { "epoch": 2.65, "grad_norm": 8.941075325012207, "learning_rate": 7.27055043350786e-08, "loss": 0.3153, "step": 18252 }, { "epoch": 2.65, "grad_norm": 8.40293025970459, "learning_rate": 7.264626561660114e-08, "loss": 0.303, "step": 18253 }, { "epoch": 2.65, "grad_norm": 9.071053504943848, "learning_rate": 7.258705013146482e-08, "loss": 0.353, "step": 18254 }, { "epoch": 2.65, "grad_norm": 8.020041465759277, "learning_rate": 7.25278578811529e-08, "loss": 0.3315, "step": 18255 }, { "epoch": 2.65, "grad_norm": 8.254247665405273, "learning_rate": 7.246868886714863e-08, "loss": 0.3205, "step": 18256 }, { "epoch": 2.65, "grad_norm": 9.565224647521973, "learning_rate": 7.240954309093428e-08, "loss": 0.3452, "step": 18257 }, { "epoch": 2.65, "grad_norm": 7.95283317565918, "learning_rate": 7.235042055399165e-08, "loss": 0.3478, "step": 18258 }, { "epoch": 2.65, "grad_norm": 8.056352615356445, "learning_rate": 7.229132125780214e-08, "loss": 0.3418, "step": 18259 }, { "epoch": 2.65, "grad_norm": 9.23999309539795, "learning_rate": 7.223224520384574e-08, "loss": 0.3438, "step": 18260 }, { "epoch": 2.65, "grad_norm": 8.028152465820312, "learning_rate": 7.217319239360343e-08, "loss": 0.3439, "step": 18261 }, { "epoch": 2.65, "grad_norm": 8.45829963684082, "learning_rate": 7.211416282855388e-08, "loss": 0.3569, "step": 18262 }, { "epoch": 2.65, "grad_norm": 9.788196563720703, "learning_rate": 7.205515651017668e-08, "loss": 0.3869, "step": 18263 }, { "epoch": 2.65, "grad_norm": 7.749876499176025, "learning_rate": 7.199617343994957e-08, "loss": 0.343, "step": 18264 }, { "epoch": 2.65, "grad_norm": 8.726302146911621, "learning_rate": 7.193721361935046e-08, "loss": 0.3195, "step": 18265 }, { "epoch": 2.65, "grad_norm": 8.612802505493164, "learning_rate": 7.18782770498566e-08, "loss": 0.3334, "step": 18266 }, { "epoch": 2.65, "grad_norm": 8.533066749572754, "learning_rate": 7.18193637329444e-08, "loss": 0.3575, "step": 18267 }, { "epoch": 2.65, "grad_norm": 8.842912673950195, "learning_rate": 7.176047367008997e-08, "loss": 0.3791, "step": 18268 }, { "epoch": 2.65, "grad_norm": 8.212881088256836, "learning_rate": 7.17016068627686e-08, "loss": 0.356, "step": 18269 }, { "epoch": 2.65, "grad_norm": 8.751415252685547, "learning_rate": 7.164276331245512e-08, "loss": 0.355, "step": 18270 }, { "epoch": 2.65, "grad_norm": 9.894251823425293, "learning_rate": 7.158394302062387e-08, "loss": 0.3799, "step": 18271 }, { "epoch": 2.65, "grad_norm": 8.83701229095459, "learning_rate": 7.152514598874837e-08, "loss": 0.409, "step": 18272 }, { "epoch": 2.65, "grad_norm": 8.770073890686035, "learning_rate": 7.146637221830187e-08, "loss": 0.3032, "step": 18273 }, { "epoch": 2.65, "grad_norm": 8.317999839782715, "learning_rate": 7.140762171075632e-08, "loss": 0.3378, "step": 18274 }, { "epoch": 2.65, "grad_norm": 9.075569152832031, "learning_rate": 7.13488944675843e-08, "loss": 0.3713, "step": 18275 }, { "epoch": 2.65, "grad_norm": 7.673696994781494, "learning_rate": 7.129019049025675e-08, "loss": 0.3182, "step": 18276 }, { "epoch": 2.65, "grad_norm": 8.903751373291016, "learning_rate": 7.123150978024439e-08, "loss": 0.3945, "step": 18277 }, { "epoch": 2.65, "grad_norm": 8.78769302368164, "learning_rate": 7.11728523390176e-08, "loss": 0.339, "step": 18278 }, { "epoch": 2.65, "grad_norm": 10.275157928466797, "learning_rate": 7.111421816804575e-08, "loss": 0.3466, "step": 18279 }, { "epoch": 2.65, "grad_norm": 8.349578857421875, "learning_rate": 7.105560726879811e-08, "loss": 0.3757, "step": 18280 }, { "epoch": 2.65, "grad_norm": 9.485868453979492, "learning_rate": 7.099701964274252e-08, "loss": 0.3534, "step": 18281 }, { "epoch": 2.65, "grad_norm": 9.432260513305664, "learning_rate": 7.093845529134734e-08, "loss": 0.4363, "step": 18282 }, { "epoch": 2.65, "grad_norm": 8.248774528503418, "learning_rate": 7.087991421607942e-08, "loss": 0.3575, "step": 18283 }, { "epoch": 2.65, "grad_norm": 6.94913387298584, "learning_rate": 7.082139641840578e-08, "loss": 0.3033, "step": 18284 }, { "epoch": 2.65, "grad_norm": 9.071357727050781, "learning_rate": 7.076290189979228e-08, "loss": 0.3014, "step": 18285 }, { "epoch": 2.65, "grad_norm": 9.469451904296875, "learning_rate": 7.070443066170429e-08, "loss": 0.3656, "step": 18286 }, { "epoch": 2.65, "grad_norm": 9.500536918640137, "learning_rate": 7.064598270560695e-08, "loss": 0.3742, "step": 18287 }, { "epoch": 2.65, "grad_norm": 9.796295166015625, "learning_rate": 7.058755803296435e-08, "loss": 0.4475, "step": 18288 }, { "epoch": 2.65, "grad_norm": 8.521655082702637, "learning_rate": 7.052915664524039e-08, "loss": 0.3125, "step": 18289 }, { "epoch": 2.65, "grad_norm": 8.696442604064941, "learning_rate": 7.047077854389826e-08, "loss": 0.3471, "step": 18290 }, { "epoch": 2.65, "grad_norm": 8.426397323608398, "learning_rate": 7.041242373040036e-08, "loss": 0.3725, "step": 18291 }, { "epoch": 2.65, "grad_norm": 8.946405410766602, "learning_rate": 7.03540922062088e-08, "loss": 0.334, "step": 18292 }, { "epoch": 2.65, "grad_norm": 8.684229850769043, "learning_rate": 7.029578397278501e-08, "loss": 0.3256, "step": 18293 }, { "epoch": 2.65, "grad_norm": 9.02669620513916, "learning_rate": 7.023749903158982e-08, "loss": 0.3344, "step": 18294 }, { "epoch": 2.65, "grad_norm": 8.765791893005371, "learning_rate": 7.017923738408305e-08, "loss": 0.3827, "step": 18295 }, { "epoch": 2.65, "grad_norm": 7.957056522369385, "learning_rate": 7.012099903172498e-08, "loss": 0.3386, "step": 18296 }, { "epoch": 2.65, "grad_norm": 9.499618530273438, "learning_rate": 7.006278397597421e-08, "loss": 0.3866, "step": 18297 }, { "epoch": 2.65, "grad_norm": 9.059029579162598, "learning_rate": 7.000459221828959e-08, "loss": 0.3649, "step": 18298 }, { "epoch": 2.66, "grad_norm": 8.261229515075684, "learning_rate": 6.994642376012849e-08, "loss": 0.2908, "step": 18299 }, { "epoch": 2.66, "grad_norm": 9.946552276611328, "learning_rate": 6.988827860294877e-08, "loss": 0.3249, "step": 18300 }, { "epoch": 2.66, "grad_norm": 8.132217407226562, "learning_rate": 6.983015674820713e-08, "loss": 0.3733, "step": 18301 }, { "epoch": 2.66, "grad_norm": 7.826810836791992, "learning_rate": 6.977205819735932e-08, "loss": 0.3389, "step": 18302 }, { "epoch": 2.66, "grad_norm": 9.200140953063965, "learning_rate": 6.971398295186127e-08, "loss": 0.3764, "step": 18303 }, { "epoch": 2.66, "grad_norm": 8.732656478881836, "learning_rate": 6.965593101316758e-08, "loss": 0.3431, "step": 18304 }, { "epoch": 2.66, "grad_norm": 7.974307537078857, "learning_rate": 6.959790238273321e-08, "loss": 0.3571, "step": 18305 }, { "epoch": 2.66, "grad_norm": 10.080772399902344, "learning_rate": 6.953989706201158e-08, "loss": 0.3876, "step": 18306 }, { "epoch": 2.66, "grad_norm": 8.293657302856445, "learning_rate": 6.948191505245582e-08, "loss": 0.296, "step": 18307 }, { "epoch": 2.66, "grad_norm": 8.218497276306152, "learning_rate": 6.942395635551889e-08, "loss": 0.3472, "step": 18308 }, { "epoch": 2.66, "grad_norm": 8.180269241333008, "learning_rate": 6.936602097265265e-08, "loss": 0.3369, "step": 18309 }, { "epoch": 2.66, "grad_norm": 8.873533248901367, "learning_rate": 6.930810890530858e-08, "loss": 0.3343, "step": 18310 }, { "epoch": 2.66, "grad_norm": 10.22191333770752, "learning_rate": 6.925022015493764e-08, "loss": 0.4238, "step": 18311 }, { "epoch": 2.66, "grad_norm": 8.610321998596191, "learning_rate": 6.919235472299023e-08, "loss": 0.3376, "step": 18312 }, { "epoch": 2.66, "grad_norm": 8.033997535705566, "learning_rate": 6.913451261091585e-08, "loss": 0.3473, "step": 18313 }, { "epoch": 2.66, "grad_norm": 8.830273628234863, "learning_rate": 6.90766938201638e-08, "loss": 0.3761, "step": 18314 }, { "epoch": 2.66, "grad_norm": 7.777235984802246, "learning_rate": 6.901889835218278e-08, "loss": 0.3284, "step": 18315 }, { "epoch": 2.66, "grad_norm": 8.172065734863281, "learning_rate": 6.896112620842032e-08, "loss": 0.3288, "step": 18316 }, { "epoch": 2.66, "grad_norm": 8.363879203796387, "learning_rate": 6.890337739032425e-08, "loss": 0.3635, "step": 18317 }, { "epoch": 2.66, "grad_norm": 9.107080459594727, "learning_rate": 6.884565189934088e-08, "loss": 0.3544, "step": 18318 }, { "epoch": 2.66, "grad_norm": 9.844552040100098, "learning_rate": 6.878794973691715e-08, "loss": 0.3211, "step": 18319 }, { "epoch": 2.66, "grad_norm": 9.373915672302246, "learning_rate": 6.873027090449801e-08, "loss": 0.3343, "step": 18320 }, { "epoch": 2.66, "grad_norm": 7.985744953155518, "learning_rate": 6.867261540352887e-08, "loss": 0.3512, "step": 18321 }, { "epoch": 2.66, "grad_norm": 8.266443252563477, "learning_rate": 6.861498323545412e-08, "loss": 0.3324, "step": 18322 }, { "epoch": 2.66, "grad_norm": 9.261617660522461, "learning_rate": 6.855737440171749e-08, "loss": 0.3829, "step": 18323 }, { "epoch": 2.66, "grad_norm": 9.67325496673584, "learning_rate": 6.849978890376251e-08, "loss": 0.3927, "step": 18324 }, { "epoch": 2.66, "grad_norm": 8.498173713684082, "learning_rate": 6.844222674303179e-08, "loss": 0.3423, "step": 18325 }, { "epoch": 2.66, "grad_norm": 9.281722068786621, "learning_rate": 6.838468792096741e-08, "loss": 0.361, "step": 18326 }, { "epoch": 2.66, "grad_norm": 7.876410484313965, "learning_rate": 6.832717243901098e-08, "loss": 0.2618, "step": 18327 }, { "epoch": 2.66, "grad_norm": 8.485240936279297, "learning_rate": 6.826968029860347e-08, "loss": 0.3371, "step": 18328 }, { "epoch": 2.66, "grad_norm": 9.414475440979004, "learning_rate": 6.821221150118517e-08, "loss": 0.3809, "step": 18329 }, { "epoch": 2.66, "grad_norm": 8.526283264160156, "learning_rate": 6.815476604819593e-08, "loss": 0.3737, "step": 18330 }, { "epoch": 2.66, "grad_norm": 9.524401664733887, "learning_rate": 6.809734394107492e-08, "loss": 0.3662, "step": 18331 }, { "epoch": 2.66, "grad_norm": 7.854797840118408, "learning_rate": 6.803994518126077e-08, "loss": 0.3035, "step": 18332 }, { "epoch": 2.66, "grad_norm": 10.198211669921875, "learning_rate": 6.798256977019157e-08, "loss": 0.3469, "step": 18333 }, { "epoch": 2.66, "grad_norm": 8.415775299072266, "learning_rate": 6.79252177093047e-08, "loss": 0.3195, "step": 18334 }, { "epoch": 2.66, "grad_norm": 8.55953598022461, "learning_rate": 6.786788900003703e-08, "loss": 0.316, "step": 18335 }, { "epoch": 2.66, "grad_norm": 8.274456977844238, "learning_rate": 6.781058364382497e-08, "loss": 0.292, "step": 18336 }, { "epoch": 2.66, "grad_norm": 9.830477714538574, "learning_rate": 6.77533016421038e-08, "loss": 0.3701, "step": 18337 }, { "epoch": 2.66, "grad_norm": 8.806917190551758, "learning_rate": 6.769604299630926e-08, "loss": 0.3428, "step": 18338 }, { "epoch": 2.66, "grad_norm": 8.444988250732422, "learning_rate": 6.763880770787522e-08, "loss": 0.3434, "step": 18339 }, { "epoch": 2.66, "grad_norm": 9.124860763549805, "learning_rate": 6.758159577823619e-08, "loss": 0.3386, "step": 18340 }, { "epoch": 2.66, "grad_norm": 8.798238754272461, "learning_rate": 6.752440720882513e-08, "loss": 0.3534, "step": 18341 }, { "epoch": 2.66, "grad_norm": 9.285045623779297, "learning_rate": 6.74672420010749e-08, "loss": 0.3597, "step": 18342 }, { "epoch": 2.66, "grad_norm": 8.91014575958252, "learning_rate": 6.741010015641768e-08, "loss": 0.3164, "step": 18343 }, { "epoch": 2.66, "grad_norm": 8.894627571105957, "learning_rate": 6.735298167628523e-08, "loss": 0.3535, "step": 18344 }, { "epoch": 2.66, "grad_norm": 7.689477920532227, "learning_rate": 6.729588656210828e-08, "loss": 0.2793, "step": 18345 }, { "epoch": 2.66, "grad_norm": 8.298973083496094, "learning_rate": 6.723881481531757e-08, "loss": 0.3216, "step": 18346 }, { "epoch": 2.66, "grad_norm": 9.331277847290039, "learning_rate": 6.718176643734263e-08, "loss": 0.415, "step": 18347 }, { "epoch": 2.66, "grad_norm": 8.122303009033203, "learning_rate": 6.712474142961289e-08, "loss": 0.3334, "step": 18348 }, { "epoch": 2.66, "grad_norm": 8.749746322631836, "learning_rate": 6.706773979355707e-08, "loss": 0.4073, "step": 18349 }, { "epoch": 2.66, "grad_norm": 10.031025886535645, "learning_rate": 6.701076153060326e-08, "loss": 0.2891, "step": 18350 }, { "epoch": 2.66, "grad_norm": 8.42928695678711, "learning_rate": 6.695380664217864e-08, "loss": 0.3667, "step": 18351 }, { "epoch": 2.66, "grad_norm": 8.464865684509277, "learning_rate": 6.689687512971043e-08, "loss": 0.3228, "step": 18352 }, { "epoch": 2.66, "grad_norm": 8.637625694274902, "learning_rate": 6.683996699462491e-08, "loss": 0.3646, "step": 18353 }, { "epoch": 2.66, "grad_norm": 8.412308692932129, "learning_rate": 6.678308223834772e-08, "loss": 0.3559, "step": 18354 }, { "epoch": 2.66, "grad_norm": 10.683859825134277, "learning_rate": 6.672622086230417e-08, "loss": 0.3738, "step": 18355 }, { "epoch": 2.66, "grad_norm": 8.434393882751465, "learning_rate": 6.666938286791879e-08, "loss": 0.3566, "step": 18356 }, { "epoch": 2.66, "grad_norm": 8.442468643188477, "learning_rate": 6.661256825661554e-08, "loss": 0.3863, "step": 18357 }, { "epoch": 2.66, "grad_norm": 8.6541166305542, "learning_rate": 6.655577702981763e-08, "loss": 0.3303, "step": 18358 }, { "epoch": 2.66, "grad_norm": 8.836934089660645, "learning_rate": 6.649900918894825e-08, "loss": 0.3139, "step": 18359 }, { "epoch": 2.66, "grad_norm": 8.050285339355469, "learning_rate": 6.644226473542913e-08, "loss": 0.3353, "step": 18360 }, { "epoch": 2.66, "grad_norm": 9.046486854553223, "learning_rate": 6.63855436706825e-08, "loss": 0.3423, "step": 18361 }, { "epoch": 2.66, "grad_norm": 8.000395774841309, "learning_rate": 6.632884599612898e-08, "loss": 0.3032, "step": 18362 }, { "epoch": 2.66, "grad_norm": 8.424233436584473, "learning_rate": 6.627217171318911e-08, "loss": 0.3408, "step": 18363 }, { "epoch": 2.66, "grad_norm": 9.183974266052246, "learning_rate": 6.621552082328297e-08, "loss": 0.3005, "step": 18364 }, { "epoch": 2.66, "grad_norm": 7.6683783531188965, "learning_rate": 6.615889332782953e-08, "loss": 0.3436, "step": 18365 }, { "epoch": 2.66, "grad_norm": 7.6922149658203125, "learning_rate": 6.610228922824779e-08, "loss": 0.3318, "step": 18366 }, { "epoch": 2.66, "grad_norm": 8.949416160583496, "learning_rate": 6.604570852595581e-08, "loss": 0.3293, "step": 18367 }, { "epoch": 2.67, "grad_norm": 9.195728302001953, "learning_rate": 6.598915122237103e-08, "loss": 0.3629, "step": 18368 }, { "epoch": 2.67, "grad_norm": 8.937418937683105, "learning_rate": 6.593261731891042e-08, "loss": 0.3463, "step": 18369 }, { "epoch": 2.67, "grad_norm": 9.738859176635742, "learning_rate": 6.587610681699041e-08, "loss": 0.3256, "step": 18370 }, { "epoch": 2.67, "grad_norm": 8.272248268127441, "learning_rate": 6.581961971802697e-08, "loss": 0.3081, "step": 18371 }, { "epoch": 2.67, "grad_norm": 8.06747055053711, "learning_rate": 6.576315602343462e-08, "loss": 0.3886, "step": 18372 }, { "epoch": 2.67, "grad_norm": 8.347393035888672, "learning_rate": 6.570671573462882e-08, "loss": 0.3119, "step": 18373 }, { "epoch": 2.67, "grad_norm": 9.48254108428955, "learning_rate": 6.565029885302286e-08, "loss": 0.3804, "step": 18374 }, { "epoch": 2.67, "grad_norm": 7.994141578674316, "learning_rate": 6.559390538003085e-08, "loss": 0.3354, "step": 18375 }, { "epoch": 2.67, "grad_norm": 7.700717449188232, "learning_rate": 6.553753531706518e-08, "loss": 0.3389, "step": 18376 }, { "epoch": 2.67, "grad_norm": 7.437779426574707, "learning_rate": 6.54811886655381e-08, "loss": 0.3327, "step": 18377 }, { "epoch": 2.67, "grad_norm": 8.228520393371582, "learning_rate": 6.542486542686154e-08, "loss": 0.3711, "step": 18378 }, { "epoch": 2.67, "grad_norm": 9.471943855285645, "learning_rate": 6.536856560244619e-08, "loss": 0.3707, "step": 18379 }, { "epoch": 2.67, "grad_norm": 8.191656112670898, "learning_rate": 6.531228919370313e-08, "loss": 0.3292, "step": 18380 }, { "epoch": 2.67, "grad_norm": 7.190318584442139, "learning_rate": 6.525603620204168e-08, "loss": 0.3134, "step": 18381 }, { "epoch": 2.67, "grad_norm": 8.502252578735352, "learning_rate": 6.519980662887182e-08, "loss": 0.3692, "step": 18382 }, { "epoch": 2.67, "grad_norm": 8.064539909362793, "learning_rate": 6.514360047560164e-08, "loss": 0.3662, "step": 18383 }, { "epoch": 2.67, "grad_norm": 9.902885437011719, "learning_rate": 6.508741774363958e-08, "loss": 0.3375, "step": 18384 }, { "epoch": 2.67, "grad_norm": 8.43366527557373, "learning_rate": 6.503125843439328e-08, "loss": 0.3225, "step": 18385 }, { "epoch": 2.67, "grad_norm": 8.804101943969727, "learning_rate": 6.497512254926952e-08, "loss": 0.337, "step": 18386 }, { "epoch": 2.67, "grad_norm": 7.9471306800842285, "learning_rate": 6.491901008967493e-08, "loss": 0.3322, "step": 18387 }, { "epoch": 2.67, "grad_norm": 8.788260459899902, "learning_rate": 6.486292105701508e-08, "loss": 0.3744, "step": 18388 }, { "epoch": 2.67, "grad_norm": 8.506789207458496, "learning_rate": 6.480685545269538e-08, "loss": 0.3455, "step": 18389 }, { "epoch": 2.67, "grad_norm": 8.873658180236816, "learning_rate": 6.475081327812037e-08, "loss": 0.3911, "step": 18390 }, { "epoch": 2.67, "grad_norm": 8.4937744140625, "learning_rate": 6.469479453469417e-08, "loss": 0.3348, "step": 18391 }, { "epoch": 2.67, "grad_norm": 9.677522659301758, "learning_rate": 6.463879922382021e-08, "loss": 0.3464, "step": 18392 }, { "epoch": 2.67, "grad_norm": 8.040783882141113, "learning_rate": 6.458282734690112e-08, "loss": 0.3685, "step": 18393 }, { "epoch": 2.67, "grad_norm": 8.0587739944458, "learning_rate": 6.45268789053397e-08, "loss": 0.3743, "step": 18394 }, { "epoch": 2.67, "grad_norm": 8.93565559387207, "learning_rate": 6.447095390053702e-08, "loss": 0.3311, "step": 18395 }, { "epoch": 2.67, "grad_norm": 8.854642868041992, "learning_rate": 6.441505233389476e-08, "loss": 0.2969, "step": 18396 }, { "epoch": 2.67, "grad_norm": 8.198555946350098, "learning_rate": 6.435917420681314e-08, "loss": 0.3441, "step": 18397 }, { "epoch": 2.67, "grad_norm": 10.021251678466797, "learning_rate": 6.430331952069213e-08, "loss": 0.3035, "step": 18398 }, { "epoch": 2.67, "grad_norm": 7.811655044555664, "learning_rate": 6.424748827693105e-08, "loss": 0.3304, "step": 18399 }, { "epoch": 2.67, "grad_norm": 9.242981910705566, "learning_rate": 6.41916804769288e-08, "loss": 0.3829, "step": 18400 }, { "epoch": 2.67, "grad_norm": 9.058913230895996, "learning_rate": 6.413589612208337e-08, "loss": 0.3874, "step": 18401 }, { "epoch": 2.67, "grad_norm": 7.6205339431762695, "learning_rate": 6.408013521379252e-08, "loss": 0.3388, "step": 18402 }, { "epoch": 2.67, "grad_norm": 9.401857376098633, "learning_rate": 6.402439775345315e-08, "loss": 0.3989, "step": 18403 }, { "epoch": 2.67, "grad_norm": 9.50480842590332, "learning_rate": 6.396868374246167e-08, "loss": 0.4284, "step": 18404 }, { "epoch": 2.67, "grad_norm": 9.056693077087402, "learning_rate": 6.391299318221399e-08, "loss": 0.3806, "step": 18405 }, { "epoch": 2.67, "grad_norm": 7.833053112030029, "learning_rate": 6.385732607410521e-08, "loss": 0.3414, "step": 18406 }, { "epoch": 2.67, "grad_norm": 8.989176750183105, "learning_rate": 6.38016824195301e-08, "loss": 0.3289, "step": 18407 }, { "epoch": 2.67, "grad_norm": 8.529706954956055, "learning_rate": 6.374606221988265e-08, "loss": 0.3818, "step": 18408 }, { "epoch": 2.67, "grad_norm": 8.938543319702148, "learning_rate": 6.369046547655632e-08, "loss": 0.3799, "step": 18409 }, { "epoch": 2.67, "grad_norm": 9.730822563171387, "learning_rate": 6.363489219094409e-08, "loss": 0.3951, "step": 18410 }, { "epoch": 2.67, "grad_norm": 10.717233657836914, "learning_rate": 6.35793423644384e-08, "loss": 0.4064, "step": 18411 }, { "epoch": 2.67, "grad_norm": 8.887145042419434, "learning_rate": 6.352381599843037e-08, "loss": 0.3205, "step": 18412 }, { "epoch": 2.67, "grad_norm": 9.437104225158691, "learning_rate": 6.346831309431188e-08, "loss": 0.3224, "step": 18413 }, { "epoch": 2.67, "grad_norm": 8.28493595123291, "learning_rate": 6.341283365347283e-08, "loss": 0.2994, "step": 18414 }, { "epoch": 2.67, "grad_norm": 8.494297981262207, "learning_rate": 6.335737767730365e-08, "loss": 0.3708, "step": 18415 }, { "epoch": 2.67, "grad_norm": 9.714263916015625, "learning_rate": 6.330194516719323e-08, "loss": 0.321, "step": 18416 }, { "epoch": 2.67, "grad_norm": 8.654831886291504, "learning_rate": 6.32465361245309e-08, "loss": 0.3436, "step": 18417 }, { "epoch": 2.67, "grad_norm": 10.478723526000977, "learning_rate": 6.319115055070435e-08, "loss": 0.4111, "step": 18418 }, { "epoch": 2.67, "grad_norm": 8.16499137878418, "learning_rate": 6.313578844710143e-08, "loss": 0.3577, "step": 18419 }, { "epoch": 2.67, "grad_norm": 10.674276351928711, "learning_rate": 6.308044981510908e-08, "loss": 0.3825, "step": 18420 }, { "epoch": 2.67, "grad_norm": 9.162012100219727, "learning_rate": 6.30251346561137e-08, "loss": 0.39, "step": 18421 }, { "epoch": 2.67, "grad_norm": 8.91270923614502, "learning_rate": 6.29698429715011e-08, "loss": 0.3397, "step": 18422 }, { "epoch": 2.67, "grad_norm": 9.731548309326172, "learning_rate": 6.291457476265671e-08, "loss": 0.3407, "step": 18423 }, { "epoch": 2.67, "grad_norm": 8.493090629577637, "learning_rate": 6.285933003096489e-08, "loss": 0.3451, "step": 18424 }, { "epoch": 2.67, "grad_norm": 10.002613067626953, "learning_rate": 6.280410877780995e-08, "loss": 0.3358, "step": 18425 }, { "epoch": 2.67, "grad_norm": 7.680624008178711, "learning_rate": 6.274891100457524e-08, "loss": 0.3368, "step": 18426 }, { "epoch": 2.67, "grad_norm": 10.390645980834961, "learning_rate": 6.269373671264389e-08, "loss": 0.378, "step": 18427 }, { "epoch": 2.67, "grad_norm": 8.450353622436523, "learning_rate": 6.263858590339766e-08, "loss": 0.3612, "step": 18428 }, { "epoch": 2.67, "grad_norm": 9.031100273132324, "learning_rate": 6.2583458578219e-08, "loss": 0.3397, "step": 18429 }, { "epoch": 2.67, "grad_norm": 9.943380355834961, "learning_rate": 6.252835473848827e-08, "loss": 0.459, "step": 18430 }, { "epoch": 2.67, "grad_norm": 8.501812934875488, "learning_rate": 6.247327438558648e-08, "loss": 0.3357, "step": 18431 }, { "epoch": 2.67, "grad_norm": 8.277039527893066, "learning_rate": 6.241821752089371e-08, "loss": 0.3426, "step": 18432 }, { "epoch": 2.67, "grad_norm": 10.437604904174805, "learning_rate": 6.236318414578879e-08, "loss": 0.3945, "step": 18433 }, { "epoch": 2.67, "grad_norm": 8.943693161010742, "learning_rate": 6.230817426165102e-08, "loss": 0.3335, "step": 18434 }, { "epoch": 2.67, "grad_norm": 8.035942077636719, "learning_rate": 6.225318786985811e-08, "loss": 0.3463, "step": 18435 }, { "epoch": 2.67, "grad_norm": 9.110092163085938, "learning_rate": 6.219822497178817e-08, "loss": 0.3898, "step": 18436 }, { "epoch": 2.68, "grad_norm": 8.381755828857422, "learning_rate": 6.214328556881765e-08, "loss": 0.3303, "step": 18437 }, { "epoch": 2.68, "grad_norm": 8.32037353515625, "learning_rate": 6.208836966232356e-08, "loss": 0.3331, "step": 18438 }, { "epoch": 2.68, "grad_norm": 9.802080154418945, "learning_rate": 6.203347725368124e-08, "loss": 0.3207, "step": 18439 }, { "epoch": 2.68, "grad_norm": 9.246161460876465, "learning_rate": 6.197860834426616e-08, "loss": 0.3959, "step": 18440 }, { "epoch": 2.68, "grad_norm": 8.229052543640137, "learning_rate": 6.192376293545287e-08, "loss": 0.3257, "step": 18441 }, { "epoch": 2.68, "grad_norm": 8.85287094116211, "learning_rate": 6.186894102861561e-08, "loss": 0.3409, "step": 18442 }, { "epoch": 2.68, "grad_norm": 8.90902328491211, "learning_rate": 6.181414262512763e-08, "loss": 0.3384, "step": 18443 }, { "epoch": 2.68, "grad_norm": 9.408581733703613, "learning_rate": 6.175936772636192e-08, "loss": 0.3546, "step": 18444 }, { "epoch": 2.68, "grad_norm": 7.9188432693481445, "learning_rate": 6.170461633369073e-08, "loss": 0.3196, "step": 18445 }, { "epoch": 2.68, "grad_norm": 9.315783500671387, "learning_rate": 6.164988844848584e-08, "loss": 0.3378, "step": 18446 }, { "epoch": 2.68, "grad_norm": 9.148000717163086, "learning_rate": 6.159518407211839e-08, "loss": 0.3244, "step": 18447 }, { "epoch": 2.68, "grad_norm": 9.051039695739746, "learning_rate": 6.154050320595894e-08, "loss": 0.3541, "step": 18448 }, { "epoch": 2.68, "grad_norm": 8.561197280883789, "learning_rate": 6.148584585137705e-08, "loss": 0.3355, "step": 18449 }, { "epoch": 2.68, "grad_norm": 8.903907775878906, "learning_rate": 6.143121200974278e-08, "loss": 0.3289, "step": 18450 }, { "epoch": 2.68, "grad_norm": 7.688458442687988, "learning_rate": 6.137660168242409e-08, "loss": 0.296, "step": 18451 }, { "epoch": 2.68, "grad_norm": 9.369006156921387, "learning_rate": 6.132201487078981e-08, "loss": 0.4533, "step": 18452 }, { "epoch": 2.68, "grad_norm": 8.039156913757324, "learning_rate": 6.126745157620717e-08, "loss": 0.3675, "step": 18453 }, { "epoch": 2.68, "grad_norm": 8.657293319702148, "learning_rate": 6.121291180004317e-08, "loss": 0.3472, "step": 18454 }, { "epoch": 2.68, "grad_norm": 8.076233863830566, "learning_rate": 6.115839554366431e-08, "loss": 0.3401, "step": 18455 }, { "epoch": 2.68, "grad_norm": 8.480975151062012, "learning_rate": 6.110390280843647e-08, "loss": 0.3266, "step": 18456 }, { "epoch": 2.68, "grad_norm": 7.931779861450195, "learning_rate": 6.104943359572479e-08, "loss": 0.3176, "step": 18457 }, { "epoch": 2.68, "grad_norm": 8.840096473693848, "learning_rate": 6.099498790689384e-08, "loss": 0.3452, "step": 18458 }, { "epoch": 2.68, "grad_norm": 10.15411376953125, "learning_rate": 6.094056574330775e-08, "loss": 0.3745, "step": 18459 }, { "epoch": 2.68, "grad_norm": 8.895123481750488, "learning_rate": 6.08861671063301e-08, "loss": 0.3983, "step": 18460 }, { "epoch": 2.68, "grad_norm": 8.807391166687012, "learning_rate": 6.083179199732347e-08, "loss": 0.3419, "step": 18461 }, { "epoch": 2.68, "grad_norm": 8.014769554138184, "learning_rate": 6.077744041765042e-08, "loss": 0.2952, "step": 18462 }, { "epoch": 2.68, "grad_norm": 8.329156875610352, "learning_rate": 6.072311236867244e-08, "loss": 0.3659, "step": 18463 }, { "epoch": 2.68, "grad_norm": 9.11790943145752, "learning_rate": 6.066880785175078e-08, "loss": 0.354, "step": 18464 }, { "epoch": 2.68, "grad_norm": 8.343578338623047, "learning_rate": 6.061452686824586e-08, "loss": 0.3533, "step": 18465 }, { "epoch": 2.68, "grad_norm": 8.256368637084961, "learning_rate": 6.056026941951764e-08, "loss": 0.3451, "step": 18466 }, { "epoch": 2.68, "grad_norm": 8.487526893615723, "learning_rate": 6.050603550692556e-08, "loss": 0.3399, "step": 18467 }, { "epoch": 2.68, "grad_norm": 8.974677085876465, "learning_rate": 6.0451825131828e-08, "loss": 0.3406, "step": 18468 }, { "epoch": 2.68, "grad_norm": 9.566031455993652, "learning_rate": 6.039763829558364e-08, "loss": 0.3885, "step": 18469 }, { "epoch": 2.68, "grad_norm": 8.808845520019531, "learning_rate": 6.03434749995494e-08, "loss": 0.3682, "step": 18470 }, { "epoch": 2.68, "grad_norm": 7.806467533111572, "learning_rate": 6.028933524508295e-08, "loss": 0.3463, "step": 18471 }, { "epoch": 2.68, "grad_norm": 8.19799518585205, "learning_rate": 6.023521903354012e-08, "loss": 0.3497, "step": 18472 }, { "epoch": 2.68, "grad_norm": 8.555007934570312, "learning_rate": 6.018112636627704e-08, "loss": 0.3801, "step": 18473 }, { "epoch": 2.68, "grad_norm": 8.742657661437988, "learning_rate": 6.012705724464883e-08, "loss": 0.3715, "step": 18474 }, { "epoch": 2.68, "grad_norm": 7.460643768310547, "learning_rate": 6.007301167001e-08, "loss": 0.3833, "step": 18475 }, { "epoch": 2.68, "grad_norm": 8.558518409729004, "learning_rate": 6.001898964371455e-08, "loss": 0.3472, "step": 18476 }, { "epoch": 2.68, "grad_norm": 8.199697494506836, "learning_rate": 5.996499116711607e-08, "loss": 0.2938, "step": 18477 }, { "epoch": 2.68, "grad_norm": 8.283099174499512, "learning_rate": 5.991101624156736e-08, "loss": 0.3477, "step": 18478 }, { "epoch": 2.68, "grad_norm": 9.48093318939209, "learning_rate": 5.985706486842069e-08, "loss": 0.3493, "step": 18479 }, { "epoch": 2.68, "grad_norm": 8.362138748168945, "learning_rate": 5.980313704902762e-08, "loss": 0.3745, "step": 18480 }, { "epoch": 2.68, "grad_norm": 8.355843544006348, "learning_rate": 5.974923278473931e-08, "loss": 0.3821, "step": 18481 }, { "epoch": 2.68, "grad_norm": 10.11260986328125, "learning_rate": 5.969535207690624e-08, "loss": 0.3995, "step": 18482 }, { "epoch": 2.68, "grad_norm": 8.42371940612793, "learning_rate": 5.964149492687831e-08, "loss": 0.3398, "step": 18483 }, { "epoch": 2.68, "grad_norm": 10.12846851348877, "learning_rate": 5.958766133600468e-08, "loss": 0.3932, "step": 18484 }, { "epoch": 2.68, "grad_norm": 9.379569053649902, "learning_rate": 5.9533851305634373e-08, "loss": 0.3336, "step": 18485 }, { "epoch": 2.68, "grad_norm": 9.279128074645996, "learning_rate": 5.9480064837115205e-08, "loss": 0.3628, "step": 18486 }, { "epoch": 2.68, "grad_norm": 8.25709056854248, "learning_rate": 5.942630193179488e-08, "loss": 0.2908, "step": 18487 }, { "epoch": 2.68, "grad_norm": 8.50184154510498, "learning_rate": 5.937256259102041e-08, "loss": 0.3114, "step": 18488 }, { "epoch": 2.68, "grad_norm": 8.239307403564453, "learning_rate": 5.931884681613763e-08, "loss": 0.3164, "step": 18489 }, { "epoch": 2.68, "grad_norm": 9.869053840637207, "learning_rate": 5.926515460849313e-08, "loss": 0.4212, "step": 18490 }, { "epoch": 2.68, "grad_norm": 7.863636016845703, "learning_rate": 5.921148596943137e-08, "loss": 0.3649, "step": 18491 }, { "epoch": 2.68, "grad_norm": 9.786776542663574, "learning_rate": 5.9157840900297404e-08, "loss": 0.3516, "step": 18492 }, { "epoch": 2.68, "grad_norm": 8.000350952148438, "learning_rate": 5.910421940243493e-08, "loss": 0.3653, "step": 18493 }, { "epoch": 2.68, "grad_norm": 8.06833267211914, "learning_rate": 5.9050621477187425e-08, "loss": 0.3099, "step": 18494 }, { "epoch": 2.68, "grad_norm": 7.635280132293701, "learning_rate": 5.8997047125897704e-08, "loss": 0.3361, "step": 18495 }, { "epoch": 2.68, "grad_norm": 8.816384315490723, "learning_rate": 5.8943496349908026e-08, "loss": 0.3954, "step": 18496 }, { "epoch": 2.68, "grad_norm": 8.473454475402832, "learning_rate": 5.8889969150559994e-08, "loss": 0.4112, "step": 18497 }, { "epoch": 2.68, "grad_norm": 7.997554302215576, "learning_rate": 5.883646552919463e-08, "loss": 0.3286, "step": 18498 }, { "epoch": 2.68, "grad_norm": 8.842535972595215, "learning_rate": 5.8782985487152434e-08, "loss": 0.3374, "step": 18499 }, { "epoch": 2.68, "grad_norm": 8.588313102722168, "learning_rate": 5.8729529025773217e-08, "loss": 0.371, "step": 18500 }, { "epoch": 2.68, "grad_norm": 9.91080379486084, "learning_rate": 5.867609614639624e-08, "loss": 0.3551, "step": 18501 }, { "epoch": 2.68, "grad_norm": 9.437287330627441, "learning_rate": 5.8622686850360206e-08, "loss": 0.3471, "step": 18502 }, { "epoch": 2.68, "grad_norm": 8.036788940429688, "learning_rate": 5.856930113900305e-08, "loss": 0.3913, "step": 18503 }, { "epoch": 2.68, "grad_norm": 7.843446731567383, "learning_rate": 5.8515939013662585e-08, "loss": 0.2904, "step": 18504 }, { "epoch": 2.68, "grad_norm": 9.505169868469238, "learning_rate": 5.8462600475675304e-08, "loss": 0.4167, "step": 18505 }, { "epoch": 2.69, "grad_norm": 7.976242542266846, "learning_rate": 5.840928552637792e-08, "loss": 0.3526, "step": 18506 }, { "epoch": 2.69, "grad_norm": 7.889284610748291, "learning_rate": 5.8355994167105685e-08, "loss": 0.3525, "step": 18507 }, { "epoch": 2.69, "grad_norm": 7.347414493560791, "learning_rate": 5.830272639919443e-08, "loss": 0.3163, "step": 18508 }, { "epoch": 2.69, "grad_norm": 9.173338890075684, "learning_rate": 5.8249482223977967e-08, "loss": 0.3513, "step": 18509 }, { "epoch": 2.69, "grad_norm": 7.584873676300049, "learning_rate": 5.8196261642790455e-08, "loss": 0.2941, "step": 18510 }, { "epoch": 2.69, "grad_norm": 8.75149154663086, "learning_rate": 5.814306465696561e-08, "loss": 0.3273, "step": 18511 }, { "epoch": 2.69, "grad_norm": 7.8777174949646, "learning_rate": 5.80898912678357e-08, "loss": 0.3608, "step": 18512 }, { "epoch": 2.69, "grad_norm": 9.966426849365234, "learning_rate": 5.803674147673332e-08, "loss": 0.3916, "step": 18513 }, { "epoch": 2.69, "grad_norm": 7.6297078132629395, "learning_rate": 5.798361528498974e-08, "loss": 0.3623, "step": 18514 }, { "epoch": 2.69, "grad_norm": 7.934813022613525, "learning_rate": 5.793051269393612e-08, "loss": 0.3305, "step": 18515 }, { "epoch": 2.69, "grad_norm": 8.399996757507324, "learning_rate": 5.787743370490272e-08, "loss": 0.3396, "step": 18516 }, { "epoch": 2.69, "grad_norm": 8.047842025756836, "learning_rate": 5.782437831921949e-08, "loss": 0.3143, "step": 18517 }, { "epoch": 2.69, "grad_norm": 8.56404972076416, "learning_rate": 5.777134653821558e-08, "loss": 0.2736, "step": 18518 }, { "epoch": 2.69, "grad_norm": 8.087594032287598, "learning_rate": 5.7718338363219596e-08, "loss": 0.3239, "step": 18519 }, { "epoch": 2.69, "grad_norm": 8.788841247558594, "learning_rate": 5.766535379555959e-08, "loss": 0.3204, "step": 18520 }, { "epoch": 2.69, "grad_norm": 7.909934997558594, "learning_rate": 5.761239283656305e-08, "loss": 0.3104, "step": 18521 }, { "epoch": 2.69, "grad_norm": 8.822746276855469, "learning_rate": 5.7559455487556805e-08, "loss": 0.3991, "step": 18522 }, { "epoch": 2.69, "grad_norm": 9.118988037109375, "learning_rate": 5.7506541749867246e-08, "loss": 0.3657, "step": 18523 }, { "epoch": 2.69, "grad_norm": 9.311624526977539, "learning_rate": 5.745365162481952e-08, "loss": 0.3658, "step": 18524 }, { "epoch": 2.69, "grad_norm": 8.67203426361084, "learning_rate": 5.740078511373936e-08, "loss": 0.3738, "step": 18525 }, { "epoch": 2.69, "grad_norm": 8.055644989013672, "learning_rate": 5.734794221795081e-08, "loss": 0.3164, "step": 18526 }, { "epoch": 2.69, "grad_norm": 8.014646530151367, "learning_rate": 5.729512293877814e-08, "loss": 0.3583, "step": 18527 }, { "epoch": 2.69, "grad_norm": 7.668041706085205, "learning_rate": 5.7242327277544075e-08, "loss": 0.3272, "step": 18528 }, { "epoch": 2.69, "grad_norm": 10.076898574829102, "learning_rate": 5.7189555235572004e-08, "loss": 0.423, "step": 18529 }, { "epoch": 2.69, "grad_norm": 8.53522777557373, "learning_rate": 5.713680681418365e-08, "loss": 0.3434, "step": 18530 }, { "epoch": 2.69, "grad_norm": 8.298986434936523, "learning_rate": 5.70840820147005e-08, "loss": 0.27, "step": 18531 }, { "epoch": 2.69, "grad_norm": 10.285896301269531, "learning_rate": 5.703138083844361e-08, "loss": 0.4338, "step": 18532 }, { "epoch": 2.69, "grad_norm": 8.042593955993652, "learning_rate": 5.6978703286733376e-08, "loss": 0.3746, "step": 18533 }, { "epoch": 2.69, "grad_norm": 8.639464378356934, "learning_rate": 5.692604936088952e-08, "loss": 0.338, "step": 18534 }, { "epoch": 2.69, "grad_norm": 8.920218467712402, "learning_rate": 5.687341906223109e-08, "loss": 0.3909, "step": 18535 }, { "epoch": 2.69, "grad_norm": 10.3900785446167, "learning_rate": 5.68208123920767e-08, "loss": 0.3886, "step": 18536 }, { "epoch": 2.69, "grad_norm": 8.63029956817627, "learning_rate": 5.6768229351744413e-08, "loss": 0.3178, "step": 18537 }, { "epoch": 2.69, "grad_norm": 7.9969305992126465, "learning_rate": 5.67156699425515e-08, "loss": 0.3543, "step": 18538 }, { "epoch": 2.69, "grad_norm": 7.317241668701172, "learning_rate": 5.6663134165814805e-08, "loss": 0.3838, "step": 18539 }, { "epoch": 2.69, "grad_norm": 8.494653701782227, "learning_rate": 5.661062202285061e-08, "loss": 0.3277, "step": 18540 }, { "epoch": 2.69, "grad_norm": 7.378537654876709, "learning_rate": 5.6558133514974296e-08, "loss": 0.282, "step": 18541 }, { "epoch": 2.69, "grad_norm": 10.91257381439209, "learning_rate": 5.650566864350115e-08, "loss": 0.4209, "step": 18542 }, { "epoch": 2.69, "grad_norm": 7.699037551879883, "learning_rate": 5.645322740974534e-08, "loss": 0.2991, "step": 18543 }, { "epoch": 2.69, "grad_norm": 8.212045669555664, "learning_rate": 5.640080981502104e-08, "loss": 0.2756, "step": 18544 }, { "epoch": 2.69, "grad_norm": 8.516181945800781, "learning_rate": 5.634841586064099e-08, "loss": 0.4057, "step": 18545 }, { "epoch": 2.69, "grad_norm": 9.568922996520996, "learning_rate": 5.629604554791834e-08, "loss": 0.3994, "step": 18546 }, { "epoch": 2.69, "grad_norm": 9.142863273620605, "learning_rate": 5.6243698878164605e-08, "loss": 0.2907, "step": 18547 }, { "epoch": 2.69, "grad_norm": 7.3155951499938965, "learning_rate": 5.619137585269185e-08, "loss": 0.3381, "step": 18548 }, { "epoch": 2.69, "grad_norm": 11.437921524047852, "learning_rate": 5.613907647281058e-08, "loss": 0.3815, "step": 18549 }, { "epoch": 2.69, "grad_norm": 9.222790718078613, "learning_rate": 5.6086800739831075e-08, "loss": 0.4016, "step": 18550 }, { "epoch": 2.69, "grad_norm": 8.24116039276123, "learning_rate": 5.6034548655063074e-08, "loss": 0.3065, "step": 18551 }, { "epoch": 2.69, "grad_norm": 8.761739730834961, "learning_rate": 5.598232021981564e-08, "loss": 0.3809, "step": 18552 }, { "epoch": 2.69, "grad_norm": 8.422538757324219, "learning_rate": 5.593011543539739e-08, "loss": 0.3414, "step": 18553 }, { "epoch": 2.69, "grad_norm": 9.352091789245605, "learning_rate": 5.5877934303116045e-08, "loss": 0.4129, "step": 18554 }, { "epoch": 2.69, "grad_norm": 8.58719253540039, "learning_rate": 5.5825776824279135e-08, "loss": 0.3925, "step": 18555 }, { "epoch": 2.69, "grad_norm": 8.367101669311523, "learning_rate": 5.577364300019316e-08, "loss": 0.3373, "step": 18556 }, { "epoch": 2.69, "grad_norm": 7.877068519592285, "learning_rate": 5.5721532832164406e-08, "loss": 0.3233, "step": 18557 }, { "epoch": 2.69, "grad_norm": 9.478561401367188, "learning_rate": 5.5669446321498506e-08, "loss": 0.3619, "step": 18558 }, { "epoch": 2.69, "grad_norm": 8.100852012634277, "learning_rate": 5.5617383469499954e-08, "loss": 0.3739, "step": 18559 }, { "epoch": 2.69, "grad_norm": 8.355454444885254, "learning_rate": 5.5565344277473725e-08, "loss": 0.3202, "step": 18560 }, { "epoch": 2.69, "grad_norm": 8.511483192443848, "learning_rate": 5.551332874672288e-08, "loss": 0.3453, "step": 18561 }, { "epoch": 2.69, "grad_norm": 9.046195983886719, "learning_rate": 5.5461336878551146e-08, "loss": 0.3645, "step": 18562 }, { "epoch": 2.69, "grad_norm": 8.92609691619873, "learning_rate": 5.540936867426094e-08, "loss": 0.3896, "step": 18563 }, { "epoch": 2.69, "grad_norm": 8.978860855102539, "learning_rate": 5.53574241351541e-08, "loss": 0.3501, "step": 18564 }, { "epoch": 2.69, "grad_norm": 10.286591529846191, "learning_rate": 5.530550326253236e-08, "loss": 0.3888, "step": 18565 }, { "epoch": 2.69, "grad_norm": 8.541319847106934, "learning_rate": 5.5253606057695914e-08, "loss": 0.3203, "step": 18566 }, { "epoch": 2.69, "grad_norm": 9.17060661315918, "learning_rate": 5.52017325219456e-08, "loss": 0.3514, "step": 18567 }, { "epoch": 2.69, "grad_norm": 8.151592254638672, "learning_rate": 5.514988265658049e-08, "loss": 0.3775, "step": 18568 }, { "epoch": 2.69, "grad_norm": 7.761280536651611, "learning_rate": 5.509805646290011e-08, "loss": 0.3201, "step": 18569 }, { "epoch": 2.69, "grad_norm": 8.842456817626953, "learning_rate": 5.5046253942202525e-08, "loss": 0.3331, "step": 18570 }, { "epoch": 2.69, "grad_norm": 9.106673240661621, "learning_rate": 5.49944750957857e-08, "loss": 0.4145, "step": 18571 }, { "epoch": 2.69, "grad_norm": 7.8171610832214355, "learning_rate": 5.49427199249467e-08, "loss": 0.3406, "step": 18572 }, { "epoch": 2.69, "grad_norm": 8.301841735839844, "learning_rate": 5.489098843098239e-08, "loss": 0.332, "step": 18573 }, { "epoch": 2.7, "grad_norm": 8.411213874816895, "learning_rate": 5.483928061518872e-08, "loss": 0.4074, "step": 18574 }, { "epoch": 2.7, "grad_norm": 8.339032173156738, "learning_rate": 5.478759647886111e-08, "loss": 0.322, "step": 18575 }, { "epoch": 2.7, "grad_norm": 8.849730491638184, "learning_rate": 5.473593602329452e-08, "loss": 0.3354, "step": 18576 }, { "epoch": 2.7, "grad_norm": 8.435964584350586, "learning_rate": 5.4684299249783016e-08, "loss": 0.3397, "step": 18577 }, { "epoch": 2.7, "grad_norm": 9.858022689819336, "learning_rate": 5.4632686159620576e-08, "loss": 0.3729, "step": 18578 }, { "epoch": 2.7, "grad_norm": 7.80118465423584, "learning_rate": 5.4581096754100167e-08, "loss": 0.2987, "step": 18579 }, { "epoch": 2.7, "grad_norm": 9.138784408569336, "learning_rate": 5.4529531034513964e-08, "loss": 0.3961, "step": 18580 }, { "epoch": 2.7, "grad_norm": 7.85172176361084, "learning_rate": 5.44779890021545e-08, "loss": 0.297, "step": 18581 }, { "epoch": 2.7, "grad_norm": 8.829801559448242, "learning_rate": 5.442647065831241e-08, "loss": 0.3591, "step": 18582 }, { "epoch": 2.7, "grad_norm": 8.60517692565918, "learning_rate": 5.4374976004278874e-08, "loss": 0.3676, "step": 18583 }, { "epoch": 2.7, "grad_norm": 8.84239387512207, "learning_rate": 5.432350504134364e-08, "loss": 0.3139, "step": 18584 }, { "epoch": 2.7, "grad_norm": 8.008318901062012, "learning_rate": 5.4272057770796686e-08, "loss": 0.364, "step": 18585 }, { "epoch": 2.7, "grad_norm": 8.129244804382324, "learning_rate": 5.422063419392653e-08, "loss": 0.3165, "step": 18586 }, { "epoch": 2.7, "grad_norm": 8.470246315002441, "learning_rate": 5.416923431202158e-08, "loss": 0.2942, "step": 18587 }, { "epoch": 2.7, "grad_norm": 8.912790298461914, "learning_rate": 5.411785812636982e-08, "loss": 0.3862, "step": 18588 }, { "epoch": 2.7, "grad_norm": 8.005194664001465, "learning_rate": 5.406650563825799e-08, "loss": 0.3243, "step": 18589 }, { "epoch": 2.7, "grad_norm": 8.721108436584473, "learning_rate": 5.401517684897316e-08, "loss": 0.3232, "step": 18590 }, { "epoch": 2.7, "grad_norm": 8.327864646911621, "learning_rate": 5.3963871759800996e-08, "loss": 0.3134, "step": 18591 }, { "epoch": 2.7, "grad_norm": 9.218982696533203, "learning_rate": 5.391259037202667e-08, "loss": 0.3403, "step": 18592 }, { "epoch": 2.7, "grad_norm": 9.157444953918457, "learning_rate": 5.3861332686935265e-08, "loss": 0.3969, "step": 18593 }, { "epoch": 2.7, "grad_norm": 7.578553199768066, "learning_rate": 5.381009870581088e-08, "loss": 0.3043, "step": 18594 }, { "epoch": 2.7, "grad_norm": 9.208477973937988, "learning_rate": 5.375888842993703e-08, "loss": 0.4078, "step": 18595 }, { "epoch": 2.7, "grad_norm": 8.71324634552002, "learning_rate": 5.370770186059681e-08, "loss": 0.3149, "step": 18596 }, { "epoch": 2.7, "grad_norm": 9.47230339050293, "learning_rate": 5.365653899907252e-08, "loss": 0.3821, "step": 18597 }, { "epoch": 2.7, "grad_norm": 9.349061965942383, "learning_rate": 5.3605399846645913e-08, "loss": 0.3368, "step": 18598 }, { "epoch": 2.7, "grad_norm": 8.570584297180176, "learning_rate": 5.355428440459842e-08, "loss": 0.3583, "step": 18599 }, { "epoch": 2.7, "grad_norm": 7.816945552825928, "learning_rate": 5.350319267421055e-08, "loss": 0.3673, "step": 18600 }, { "epoch": 2.7, "grad_norm": 9.049074172973633, "learning_rate": 5.345212465676208e-08, "loss": 0.3043, "step": 18601 }, { "epoch": 2.7, "grad_norm": 8.189122200012207, "learning_rate": 5.3401080353532966e-08, "loss": 0.3222, "step": 18602 }, { "epoch": 2.7, "grad_norm": 8.869939804077148, "learning_rate": 5.3350059765801317e-08, "loss": 0.3327, "step": 18603 }, { "epoch": 2.7, "grad_norm": 8.825970649719238, "learning_rate": 5.32990628948462e-08, "loss": 0.3366, "step": 18604 }, { "epoch": 2.7, "grad_norm": 8.298419952392578, "learning_rate": 5.3248089741944614e-08, "loss": 0.3639, "step": 18605 }, { "epoch": 2.7, "grad_norm": 8.158573150634766, "learning_rate": 5.3197140308373854e-08, "loss": 0.3326, "step": 18606 }, { "epoch": 2.7, "grad_norm": 8.084400177001953, "learning_rate": 5.314621459541024e-08, "loss": 0.3188, "step": 18607 }, { "epoch": 2.7, "grad_norm": 8.467435836791992, "learning_rate": 5.3095312604329864e-08, "loss": 0.3344, "step": 18608 }, { "epoch": 2.7, "grad_norm": 8.622721672058105, "learning_rate": 5.304443433640782e-08, "loss": 0.3646, "step": 18609 }, { "epoch": 2.7, "grad_norm": 8.123799324035645, "learning_rate": 5.299357979291885e-08, "loss": 0.3137, "step": 18610 }, { "epoch": 2.7, "grad_norm": 8.710617065429688, "learning_rate": 5.2942748975137064e-08, "loss": 0.3676, "step": 18611 }, { "epoch": 2.7, "grad_norm": 10.225655555725098, "learning_rate": 5.289194188433588e-08, "loss": 0.41, "step": 18612 }, { "epoch": 2.7, "grad_norm": 11.36227035522461, "learning_rate": 5.2841158521788165e-08, "loss": 0.3607, "step": 18613 }, { "epoch": 2.7, "grad_norm": 7.780554294586182, "learning_rate": 5.279039888876624e-08, "loss": 0.3275, "step": 18614 }, { "epoch": 2.7, "grad_norm": 8.751760482788086, "learning_rate": 5.273966298654187e-08, "loss": 0.3473, "step": 18615 }, { "epoch": 2.7, "grad_norm": 8.224146842956543, "learning_rate": 5.2688950816386025e-08, "loss": 0.3839, "step": 18616 }, { "epoch": 2.7, "grad_norm": 8.315309524536133, "learning_rate": 5.263826237956936e-08, "loss": 0.3759, "step": 18617 }, { "epoch": 2.7, "grad_norm": 9.345829963684082, "learning_rate": 5.258759767736176e-08, "loss": 0.3329, "step": 18618 }, { "epoch": 2.7, "grad_norm": 7.291614055633545, "learning_rate": 5.253695671103253e-08, "loss": 0.3082, "step": 18619 }, { "epoch": 2.7, "grad_norm": 8.351007461547852, "learning_rate": 5.2486339481850326e-08, "loss": 0.3129, "step": 18620 }, { "epoch": 2.7, "grad_norm": 9.916882514953613, "learning_rate": 5.243574599108347e-08, "loss": 0.4, "step": 18621 }, { "epoch": 2.7, "grad_norm": 9.270313262939453, "learning_rate": 5.238517623999905e-08, "loss": 0.3766, "step": 18622 }, { "epoch": 2.7, "grad_norm": 8.215594291687012, "learning_rate": 5.233463022986473e-08, "loss": 0.3352, "step": 18623 }, { "epoch": 2.7, "grad_norm": 10.54697322845459, "learning_rate": 5.228410796194605e-08, "loss": 0.3761, "step": 18624 }, { "epoch": 2.7, "grad_norm": 8.841157913208008, "learning_rate": 5.223360943750954e-08, "loss": 0.3679, "step": 18625 }, { "epoch": 2.7, "grad_norm": 7.937772274017334, "learning_rate": 5.218313465781976e-08, "loss": 0.2744, "step": 18626 }, { "epoch": 2.7, "grad_norm": 7.402825355529785, "learning_rate": 5.213268362414158e-08, "loss": 0.2793, "step": 18627 }, { "epoch": 2.7, "grad_norm": 7.925081729888916, "learning_rate": 5.208225633773888e-08, "loss": 0.3344, "step": 18628 }, { "epoch": 2.7, "grad_norm": 8.608501434326172, "learning_rate": 5.203185279987499e-08, "loss": 0.3879, "step": 18629 }, { "epoch": 2.7, "grad_norm": 8.212406158447266, "learning_rate": 5.198147301181277e-08, "loss": 0.339, "step": 18630 }, { "epoch": 2.7, "grad_norm": 9.090188980102539, "learning_rate": 5.1931116974814446e-08, "loss": 0.3745, "step": 18631 }, { "epoch": 2.7, "grad_norm": 8.980175018310547, "learning_rate": 5.188078469014145e-08, "loss": 0.3159, "step": 18632 }, { "epoch": 2.7, "grad_norm": 8.72479248046875, "learning_rate": 5.1830476159054895e-08, "loss": 0.3627, "step": 18633 }, { "epoch": 2.7, "grad_norm": 9.213598251342773, "learning_rate": 5.178019138281509e-08, "loss": 0.4084, "step": 18634 }, { "epoch": 2.7, "grad_norm": 8.385428428649902, "learning_rate": 5.172993036268214e-08, "loss": 0.3515, "step": 18635 }, { "epoch": 2.7, "grad_norm": 9.14714527130127, "learning_rate": 5.16796930999146e-08, "loss": 0.3775, "step": 18636 }, { "epoch": 2.7, "grad_norm": 8.45789909362793, "learning_rate": 5.162947959577191e-08, "loss": 0.3885, "step": 18637 }, { "epoch": 2.7, "grad_norm": 11.817176818847656, "learning_rate": 5.1579289851511274e-08, "loss": 0.3722, "step": 18638 }, { "epoch": 2.7, "grad_norm": 8.999642372131348, "learning_rate": 5.15291238683907e-08, "loss": 0.4009, "step": 18639 }, { "epoch": 2.7, "grad_norm": 9.56881332397461, "learning_rate": 5.147898164766695e-08, "loss": 0.3554, "step": 18640 }, { "epoch": 2.7, "grad_norm": 9.958773612976074, "learning_rate": 5.1428863190595916e-08, "loss": 0.3919, "step": 18641 }, { "epoch": 2.7, "grad_norm": 9.243330955505371, "learning_rate": 5.137876849843359e-08, "loss": 0.3734, "step": 18642 }, { "epoch": 2.71, "grad_norm": 9.927910804748535, "learning_rate": 5.132869757243474e-08, "loss": 0.39, "step": 18643 }, { "epoch": 2.71, "grad_norm": 9.1316556930542, "learning_rate": 5.127865041385415e-08, "loss": 0.3481, "step": 18644 }, { "epoch": 2.71, "grad_norm": 9.462597846984863, "learning_rate": 5.122862702394515e-08, "loss": 0.3738, "step": 18645 }, { "epoch": 2.71, "grad_norm": 9.408145904541016, "learning_rate": 5.1178627403961614e-08, "loss": 0.3916, "step": 18646 }, { "epoch": 2.71, "grad_norm": 9.125812530517578, "learning_rate": 5.1128651555155775e-08, "loss": 0.3163, "step": 18647 }, { "epoch": 2.71, "grad_norm": 9.362117767333984, "learning_rate": 5.107869947877985e-08, "loss": 0.3467, "step": 18648 }, { "epoch": 2.71, "grad_norm": 9.058355331420898, "learning_rate": 5.102877117608528e-08, "loss": 0.3079, "step": 18649 }, { "epoch": 2.71, "grad_norm": 12.033897399902344, "learning_rate": 5.097886664832285e-08, "loss": 0.3999, "step": 18650 }, { "epoch": 2.71, "grad_norm": 8.743393898010254, "learning_rate": 5.092898589674299e-08, "loss": 0.3644, "step": 18651 }, { "epoch": 2.71, "grad_norm": 7.027951240539551, "learning_rate": 5.087912892259527e-08, "loss": 0.2807, "step": 18652 }, { "epoch": 2.71, "grad_norm": 11.09521770477295, "learning_rate": 5.08292957271288e-08, "loss": 0.3399, "step": 18653 }, { "epoch": 2.71, "grad_norm": 8.395914077758789, "learning_rate": 5.0779486311592124e-08, "loss": 0.3279, "step": 18654 }, { "epoch": 2.71, "grad_norm": 8.528068542480469, "learning_rate": 5.072970067723303e-08, "loss": 0.3834, "step": 18655 }, { "epoch": 2.71, "grad_norm": 8.555380821228027, "learning_rate": 5.067993882529908e-08, "loss": 0.2939, "step": 18656 }, { "epoch": 2.71, "grad_norm": 8.378907203674316, "learning_rate": 5.06302007570365e-08, "loss": 0.3235, "step": 18657 }, { "epoch": 2.71, "grad_norm": 8.301084518432617, "learning_rate": 5.0580486473691956e-08, "loss": 0.298, "step": 18658 }, { "epoch": 2.71, "grad_norm": 8.470527648925781, "learning_rate": 5.0530795976510445e-08, "loss": 0.3386, "step": 18659 }, { "epoch": 2.71, "grad_norm": 10.542755126953125, "learning_rate": 5.048112926673731e-08, "loss": 0.4071, "step": 18660 }, { "epoch": 2.71, "grad_norm": 10.093377113342285, "learning_rate": 5.043148634561656e-08, "loss": 0.4053, "step": 18661 }, { "epoch": 2.71, "grad_norm": 9.298309326171875, "learning_rate": 5.038186721439208e-08, "loss": 0.3406, "step": 18662 }, { "epoch": 2.71, "grad_norm": 9.887901306152344, "learning_rate": 5.0332271874306886e-08, "loss": 0.3652, "step": 18663 }, { "epoch": 2.71, "grad_norm": 9.384093284606934, "learning_rate": 5.028270032660365e-08, "loss": 0.3383, "step": 18664 }, { "epoch": 2.71, "grad_norm": 10.253230094909668, "learning_rate": 5.023315257252414e-08, "loss": 0.4747, "step": 18665 }, { "epoch": 2.71, "grad_norm": 9.562138557434082, "learning_rate": 5.018362861330971e-08, "loss": 0.3955, "step": 18666 }, { "epoch": 2.71, "grad_norm": 8.407325744628906, "learning_rate": 5.013412845020137e-08, "loss": 0.3239, "step": 18667 }, { "epoch": 2.71, "grad_norm": 9.319782257080078, "learning_rate": 5.008465208443902e-08, "loss": 0.3476, "step": 18668 }, { "epoch": 2.71, "grad_norm": 9.167315483093262, "learning_rate": 5.003519951726221e-08, "loss": 0.389, "step": 18669 }, { "epoch": 2.71, "grad_norm": 7.519837856292725, "learning_rate": 4.998577074991006e-08, "loss": 0.2898, "step": 18670 }, { "epoch": 2.71, "grad_norm": 7.804259777069092, "learning_rate": 4.9936365783620704e-08, "loss": 0.2772, "step": 18671 }, { "epoch": 2.71, "grad_norm": 10.019879341125488, "learning_rate": 4.9886984619632034e-08, "loss": 0.3848, "step": 18672 }, { "epoch": 2.71, "grad_norm": 8.46795654296875, "learning_rate": 4.983762725918117e-08, "loss": 0.3272, "step": 18673 }, { "epoch": 2.71, "grad_norm": 8.869714736938477, "learning_rate": 4.978829370350468e-08, "loss": 0.3669, "step": 18674 }, { "epoch": 2.71, "grad_norm": 8.852944374084473, "learning_rate": 4.9738983953838684e-08, "loss": 0.3964, "step": 18675 }, { "epoch": 2.71, "grad_norm": 7.837883472442627, "learning_rate": 4.9689698011418315e-08, "loss": 0.3067, "step": 18676 }, { "epoch": 2.71, "grad_norm": 8.319050788879395, "learning_rate": 4.964043587747868e-08, "loss": 0.3677, "step": 18677 }, { "epoch": 2.71, "grad_norm": 9.594398498535156, "learning_rate": 4.9591197553253364e-08, "loss": 0.3738, "step": 18678 }, { "epoch": 2.71, "grad_norm": 8.03960132598877, "learning_rate": 4.95419830399767e-08, "loss": 0.354, "step": 18679 }, { "epoch": 2.71, "grad_norm": 8.78462028503418, "learning_rate": 4.949279233888104e-08, "loss": 0.347, "step": 18680 }, { "epoch": 2.71, "grad_norm": 15.593637466430664, "learning_rate": 4.94436254511994e-08, "loss": 0.3298, "step": 18681 }, { "epoch": 2.71, "grad_norm": 9.194332122802734, "learning_rate": 4.9394482378163015e-08, "loss": 0.3548, "step": 18682 }, { "epoch": 2.71, "grad_norm": 7.060298919677734, "learning_rate": 4.934536312100324e-08, "loss": 0.2855, "step": 18683 }, { "epoch": 2.71, "grad_norm": 9.510397911071777, "learning_rate": 4.929626768095085e-08, "loss": 0.3738, "step": 18684 }, { "epoch": 2.71, "grad_norm": 7.531859397888184, "learning_rate": 4.924719605923566e-08, "loss": 0.3188, "step": 18685 }, { "epoch": 2.71, "grad_norm": 8.615488052368164, "learning_rate": 4.9198148257087234e-08, "loss": 0.3763, "step": 18686 }, { "epoch": 2.71, "grad_norm": 8.728243827819824, "learning_rate": 4.914912427573426e-08, "loss": 0.2994, "step": 18687 }, { "epoch": 2.71, "grad_norm": 7.495064735412598, "learning_rate": 4.9100124116405074e-08, "loss": 0.284, "step": 18688 }, { "epoch": 2.71, "grad_norm": 8.221213340759277, "learning_rate": 4.905114778032715e-08, "loss": 0.3857, "step": 18689 }, { "epoch": 2.71, "grad_norm": 8.762171745300293, "learning_rate": 4.900219526872762e-08, "loss": 0.4214, "step": 18690 }, { "epoch": 2.71, "grad_norm": 8.297371864318848, "learning_rate": 4.8953266582833055e-08, "loss": 0.3399, "step": 18691 }, { "epoch": 2.71, "grad_norm": 8.632410049438477, "learning_rate": 4.89043617238688e-08, "loss": 0.4087, "step": 18692 }, { "epoch": 2.71, "grad_norm": 8.001693725585938, "learning_rate": 4.885548069306056e-08, "loss": 0.3456, "step": 18693 }, { "epoch": 2.71, "grad_norm": 8.420870780944824, "learning_rate": 4.880662349163278e-08, "loss": 0.3362, "step": 18694 }, { "epoch": 2.71, "grad_norm": 10.759905815124512, "learning_rate": 4.8757790120809495e-08, "loss": 0.4049, "step": 18695 }, { "epoch": 2.71, "grad_norm": 10.561958312988281, "learning_rate": 4.8708980581814275e-08, "loss": 0.3871, "step": 18696 }, { "epoch": 2.71, "grad_norm": 8.59697151184082, "learning_rate": 4.86601948758697e-08, "loss": 0.3037, "step": 18697 }, { "epoch": 2.71, "grad_norm": 7.7570109367370605, "learning_rate": 4.8611433004198345e-08, "loss": 0.3596, "step": 18698 }, { "epoch": 2.71, "grad_norm": 8.07741641998291, "learning_rate": 4.856269496802146e-08, "loss": 0.3365, "step": 18699 }, { "epoch": 2.71, "grad_norm": 8.141212463378906, "learning_rate": 4.851398076856061e-08, "loss": 0.3061, "step": 18700 }, { "epoch": 2.71, "grad_norm": 8.810562133789062, "learning_rate": 4.846529040703573e-08, "loss": 0.3556, "step": 18701 }, { "epoch": 2.71, "grad_norm": 8.62635612487793, "learning_rate": 4.841662388466716e-08, "loss": 0.3376, "step": 18702 }, { "epoch": 2.71, "grad_norm": 7.603636741638184, "learning_rate": 4.8367981202673826e-08, "loss": 0.3156, "step": 18703 }, { "epoch": 2.71, "grad_norm": 7.946206569671631, "learning_rate": 4.831936236227441e-08, "loss": 0.3538, "step": 18704 }, { "epoch": 2.71, "grad_norm": 9.331710815429688, "learning_rate": 4.827076736468705e-08, "loss": 0.378, "step": 18705 }, { "epoch": 2.71, "grad_norm": 8.071420669555664, "learning_rate": 4.822219621112922e-08, "loss": 0.3078, "step": 18706 }, { "epoch": 2.71, "grad_norm": 9.31425666809082, "learning_rate": 4.8173648902817834e-08, "loss": 0.3755, "step": 18707 }, { "epoch": 2.71, "grad_norm": 7.488035202026367, "learning_rate": 4.812512544096914e-08, "loss": 0.3167, "step": 18708 }, { "epoch": 2.71, "grad_norm": 9.486693382263184, "learning_rate": 4.8076625826798724e-08, "loss": 0.3513, "step": 18709 }, { "epoch": 2.71, "grad_norm": 8.585216522216797, "learning_rate": 4.8028150061521723e-08, "loss": 0.3387, "step": 18710 }, { "epoch": 2.71, "grad_norm": 8.356582641601562, "learning_rate": 4.797969814635272e-08, "loss": 0.3563, "step": 18711 }, { "epoch": 2.72, "grad_norm": 8.58894157409668, "learning_rate": 4.7931270082505636e-08, "loss": 0.3225, "step": 18712 }, { "epoch": 2.72, "grad_norm": 8.183737754821777, "learning_rate": 4.788286587119328e-08, "loss": 0.3084, "step": 18713 }, { "epoch": 2.72, "grad_norm": 7.856688976287842, "learning_rate": 4.783448551362901e-08, "loss": 0.2949, "step": 18714 }, { "epoch": 2.72, "grad_norm": 8.190454483032227, "learning_rate": 4.77861290110243e-08, "loss": 0.3254, "step": 18715 }, { "epoch": 2.72, "grad_norm": 9.888465881347656, "learning_rate": 4.77377963645913e-08, "loss": 0.3165, "step": 18716 }, { "epoch": 2.72, "grad_norm": 10.183000564575195, "learning_rate": 4.768948757554026e-08, "loss": 0.3639, "step": 18717 }, { "epoch": 2.72, "grad_norm": 8.513789176940918, "learning_rate": 4.764120264508176e-08, "loss": 0.3994, "step": 18718 }, { "epoch": 2.72, "grad_norm": 8.305841445922852, "learning_rate": 4.759294157442573e-08, "loss": 0.2996, "step": 18719 }, { "epoch": 2.72, "grad_norm": 8.480009078979492, "learning_rate": 4.754470436478075e-08, "loss": 0.3738, "step": 18720 }, { "epoch": 2.72, "grad_norm": 7.9613165855407715, "learning_rate": 4.7496491017355975e-08, "loss": 0.3527, "step": 18721 }, { "epoch": 2.72, "grad_norm": 8.358574867248535, "learning_rate": 4.744830153335877e-08, "loss": 0.3455, "step": 18722 }, { "epoch": 2.72, "grad_norm": 8.94920825958252, "learning_rate": 4.740013591399672e-08, "loss": 0.358, "step": 18723 }, { "epoch": 2.72, "grad_norm": 9.145979881286621, "learning_rate": 4.7351994160476305e-08, "loss": 0.3989, "step": 18724 }, { "epoch": 2.72, "grad_norm": 9.395719528198242, "learning_rate": 4.7303876274003894e-08, "loss": 0.3864, "step": 18725 }, { "epoch": 2.72, "grad_norm": 8.491642951965332, "learning_rate": 4.725578225578486e-08, "loss": 0.3442, "step": 18726 }, { "epoch": 2.72, "grad_norm": 9.148858070373535, "learning_rate": 4.7207712107024124e-08, "loss": 0.3719, "step": 18727 }, { "epoch": 2.72, "grad_norm": 9.147992134094238, "learning_rate": 4.7159665828926165e-08, "loss": 0.3273, "step": 18728 }, { "epoch": 2.72, "grad_norm": 9.20920467376709, "learning_rate": 4.711164342269447e-08, "loss": 0.3857, "step": 18729 }, { "epoch": 2.72, "grad_norm": 9.204802513122559, "learning_rate": 4.706364488953229e-08, "loss": 0.3309, "step": 18730 }, { "epoch": 2.72, "grad_norm": 9.995377540588379, "learning_rate": 4.701567023064224e-08, "loss": 0.402, "step": 18731 }, { "epoch": 2.72, "grad_norm": 7.645411968231201, "learning_rate": 4.6967719447225886e-08, "loss": 0.3463, "step": 18732 }, { "epoch": 2.72, "grad_norm": 9.520272254943848, "learning_rate": 4.691979254048506e-08, "loss": 0.4302, "step": 18733 }, { "epoch": 2.72, "grad_norm": 8.265352249145508, "learning_rate": 4.687188951161991e-08, "loss": 0.341, "step": 18734 }, { "epoch": 2.72, "grad_norm": 8.770164489746094, "learning_rate": 4.682401036183126e-08, "loss": 0.3798, "step": 18735 }, { "epoch": 2.72, "grad_norm": 8.014655113220215, "learning_rate": 4.677615509231802e-08, "loss": 0.2994, "step": 18736 }, { "epoch": 2.72, "grad_norm": 9.696331024169922, "learning_rate": 4.672832370427959e-08, "loss": 0.341, "step": 18737 }, { "epoch": 2.72, "grad_norm": 9.370766639709473, "learning_rate": 4.6680516198914e-08, "loss": 0.3607, "step": 18738 }, { "epoch": 2.72, "grad_norm": 8.684799194335938, "learning_rate": 4.6632732577419176e-08, "loss": 0.2935, "step": 18739 }, { "epoch": 2.72, "grad_norm": 8.528502464294434, "learning_rate": 4.6584972840992055e-08, "loss": 0.3795, "step": 18740 }, { "epoch": 2.72, "grad_norm": 8.662262916564941, "learning_rate": 4.653723699082934e-08, "loss": 0.3547, "step": 18741 }, { "epoch": 2.72, "grad_norm": 9.659000396728516, "learning_rate": 4.648952502812697e-08, "loss": 0.3699, "step": 18742 }, { "epoch": 2.72, "grad_norm": 7.1966681480407715, "learning_rate": 4.644183695408033e-08, "loss": 0.3146, "step": 18743 }, { "epoch": 2.72, "grad_norm": 9.115507125854492, "learning_rate": 4.6394172769884e-08, "loss": 0.2994, "step": 18744 }, { "epoch": 2.72, "grad_norm": 9.338276863098145, "learning_rate": 4.634653247673215e-08, "loss": 0.3641, "step": 18745 }, { "epoch": 2.72, "grad_norm": 8.775442123413086, "learning_rate": 4.6298916075818484e-08, "loss": 0.3759, "step": 18746 }, { "epoch": 2.72, "grad_norm": 8.167356491088867, "learning_rate": 4.625132356833594e-08, "loss": 0.3072, "step": 18747 }, { "epoch": 2.72, "grad_norm": 8.308492660522461, "learning_rate": 4.620375495547668e-08, "loss": 0.3138, "step": 18748 }, { "epoch": 2.72, "grad_norm": 10.265140533447266, "learning_rate": 4.615621023843275e-08, "loss": 0.3929, "step": 18749 }, { "epoch": 2.72, "grad_norm": 9.516119956970215, "learning_rate": 4.6108689418394965e-08, "loss": 0.3862, "step": 18750 }, { "epoch": 2.72, "grad_norm": 9.6735200881958, "learning_rate": 4.606119249655416e-08, "loss": 0.4226, "step": 18751 }, { "epoch": 2.72, "grad_norm": 9.9754056930542, "learning_rate": 4.6013719474100266e-08, "loss": 0.3699, "step": 18752 }, { "epoch": 2.72, "grad_norm": 9.306021690368652, "learning_rate": 4.5966270352222335e-08, "loss": 0.41, "step": 18753 }, { "epoch": 2.72, "grad_norm": 8.575979232788086, "learning_rate": 4.591884513210964e-08, "loss": 0.3272, "step": 18754 }, { "epoch": 2.72, "grad_norm": 9.279463768005371, "learning_rate": 4.587144381494978e-08, "loss": 0.3527, "step": 18755 }, { "epoch": 2.72, "grad_norm": 9.678778648376465, "learning_rate": 4.5824066401930813e-08, "loss": 0.3466, "step": 18756 }, { "epoch": 2.72, "grad_norm": 8.883528709411621, "learning_rate": 4.577671289423923e-08, "loss": 0.36, "step": 18757 }, { "epoch": 2.72, "grad_norm": 9.440103530883789, "learning_rate": 4.5729383293061976e-08, "loss": 0.373, "step": 18758 }, { "epoch": 2.72, "grad_norm": 10.972790718078613, "learning_rate": 4.568207759958442e-08, "loss": 0.3761, "step": 18759 }, { "epoch": 2.72, "grad_norm": 9.06555461883545, "learning_rate": 4.563479581499163e-08, "loss": 0.3701, "step": 18760 }, { "epoch": 2.72, "grad_norm": 8.72368335723877, "learning_rate": 4.558753794046855e-08, "loss": 0.3111, "step": 18761 }, { "epoch": 2.72, "grad_norm": 9.732752799987793, "learning_rate": 4.554030397719877e-08, "loss": 0.3911, "step": 18762 }, { "epoch": 2.72, "grad_norm": 8.456913948059082, "learning_rate": 4.549309392636591e-08, "loss": 0.3403, "step": 18763 }, { "epoch": 2.72, "grad_norm": 7.197829723358154, "learning_rate": 4.5445907789152693e-08, "loss": 0.3299, "step": 18764 }, { "epoch": 2.72, "grad_norm": 8.45669937133789, "learning_rate": 4.539874556674128e-08, "loss": 0.3701, "step": 18765 }, { "epoch": 2.72, "grad_norm": 9.685820579528809, "learning_rate": 4.535160726031329e-08, "loss": 0.3407, "step": 18766 }, { "epoch": 2.72, "grad_norm": 9.690006256103516, "learning_rate": 4.530449287104954e-08, "loss": 0.3729, "step": 18767 }, { "epoch": 2.72, "grad_norm": 9.699241638183594, "learning_rate": 4.525740240013065e-08, "loss": 0.3597, "step": 18768 }, { "epoch": 2.72, "grad_norm": 7.920623779296875, "learning_rate": 4.5210335848736016e-08, "loss": 0.3461, "step": 18769 }, { "epoch": 2.72, "grad_norm": 8.49425983428955, "learning_rate": 4.516329321804513e-08, "loss": 0.328, "step": 18770 }, { "epoch": 2.72, "grad_norm": 8.724180221557617, "learning_rate": 4.51162745092365e-08, "loss": 0.3666, "step": 18771 }, { "epoch": 2.72, "grad_norm": 8.162936210632324, "learning_rate": 4.506927972348806e-08, "loss": 0.3791, "step": 18772 }, { "epoch": 2.72, "grad_norm": 7.509320259094238, "learning_rate": 4.5022308861977444e-08, "loss": 0.3335, "step": 18773 }, { "epoch": 2.72, "grad_norm": 9.353387832641602, "learning_rate": 4.4975361925880806e-08, "loss": 0.4015, "step": 18774 }, { "epoch": 2.72, "grad_norm": 9.270820617675781, "learning_rate": 4.49284389163751e-08, "loss": 0.3473, "step": 18775 }, { "epoch": 2.72, "grad_norm": 9.218633651733398, "learning_rate": 4.4881539834635277e-08, "loss": 0.3565, "step": 18776 }, { "epoch": 2.72, "grad_norm": 8.716022491455078, "learning_rate": 4.483466468183672e-08, "loss": 0.3186, "step": 18777 }, { "epoch": 2.72, "grad_norm": 7.3525471687316895, "learning_rate": 4.478781345915361e-08, "loss": 0.3592, "step": 18778 }, { "epoch": 2.72, "grad_norm": 10.362000465393066, "learning_rate": 4.4740986167759784e-08, "loss": 0.3754, "step": 18779 }, { "epoch": 2.72, "grad_norm": 8.094682693481445, "learning_rate": 4.469418280882842e-08, "loss": 0.3712, "step": 18780 }, { "epoch": 2.73, "grad_norm": 7.534519195556641, "learning_rate": 4.4647403383532014e-08, "loss": 0.3229, "step": 18781 }, { "epoch": 2.73, "grad_norm": 8.237847328186035, "learning_rate": 4.4600647893042745e-08, "loss": 0.3844, "step": 18782 }, { "epoch": 2.73, "grad_norm": 8.01094913482666, "learning_rate": 4.455391633853178e-08, "loss": 0.3437, "step": 18783 }, { "epoch": 2.73, "grad_norm": 9.900262832641602, "learning_rate": 4.450720872117009e-08, "loss": 0.3967, "step": 18784 }, { "epoch": 2.73, "grad_norm": 8.04743480682373, "learning_rate": 4.4460525042127714e-08, "loss": 0.32, "step": 18785 }, { "epoch": 2.73, "grad_norm": 7.745029926300049, "learning_rate": 4.4413865302574294e-08, "loss": 0.303, "step": 18786 }, { "epoch": 2.73, "grad_norm": 8.518274307250977, "learning_rate": 4.4367229503678996e-08, "loss": 0.2875, "step": 18787 }, { "epoch": 2.73, "grad_norm": 7.719298362731934, "learning_rate": 4.432061764660977e-08, "loss": 0.323, "step": 18788 }, { "epoch": 2.73, "grad_norm": 8.964197158813477, "learning_rate": 4.42740297325348e-08, "loss": 0.3604, "step": 18789 }, { "epoch": 2.73, "grad_norm": 8.355830192565918, "learning_rate": 4.422746576262093e-08, "loss": 0.3279, "step": 18790 }, { "epoch": 2.73, "grad_norm": 8.351435661315918, "learning_rate": 4.418092573803522e-08, "loss": 0.3755, "step": 18791 }, { "epoch": 2.73, "grad_norm": 9.058904647827148, "learning_rate": 4.413440965994319e-08, "loss": 0.3758, "step": 18792 }, { "epoch": 2.73, "grad_norm": 8.131802558898926, "learning_rate": 4.408791752951058e-08, "loss": 0.349, "step": 18793 }, { "epoch": 2.73, "grad_norm": 9.062470436096191, "learning_rate": 4.4041449347901774e-08, "loss": 0.3747, "step": 18794 }, { "epoch": 2.73, "grad_norm": 7.947388172149658, "learning_rate": 4.3995005116281295e-08, "loss": 0.3153, "step": 18795 }, { "epoch": 2.73, "grad_norm": 8.518327713012695, "learning_rate": 4.394858483581265e-08, "loss": 0.3278, "step": 18796 }, { "epoch": 2.73, "grad_norm": 8.958513259887695, "learning_rate": 4.390218850765859e-08, "loss": 0.3699, "step": 18797 }, { "epoch": 2.73, "grad_norm": 7.922127723693848, "learning_rate": 4.385581613298195e-08, "loss": 0.3265, "step": 18798 }, { "epoch": 2.73, "grad_norm": 7.919425010681152, "learning_rate": 4.380946771294425e-08, "loss": 0.3451, "step": 18799 }, { "epoch": 2.73, "grad_norm": 9.662687301635742, "learning_rate": 4.376314324870656e-08, "loss": 0.3897, "step": 18800 }, { "epoch": 2.73, "grad_norm": 8.247081756591797, "learning_rate": 4.3716842741429726e-08, "loss": 0.2903, "step": 18801 }, { "epoch": 2.73, "grad_norm": 8.54462718963623, "learning_rate": 4.3670566192273496e-08, "loss": 0.3376, "step": 18802 }, { "epoch": 2.73, "grad_norm": 9.57942008972168, "learning_rate": 4.3624313602397376e-08, "loss": 0.3728, "step": 18803 }, { "epoch": 2.73, "grad_norm": 8.881051063537598, "learning_rate": 4.357808497296011e-08, "loss": 0.3549, "step": 18804 }, { "epoch": 2.73, "grad_norm": 8.871337890625, "learning_rate": 4.3531880305120005e-08, "loss": 0.3372, "step": 18805 }, { "epoch": 2.73, "grad_norm": 8.044930458068848, "learning_rate": 4.348569960003445e-08, "loss": 0.2919, "step": 18806 }, { "epoch": 2.73, "grad_norm": 8.384682655334473, "learning_rate": 4.3439542858860535e-08, "loss": 0.3287, "step": 18807 }, { "epoch": 2.73, "grad_norm": 7.578100681304932, "learning_rate": 4.3393410082754763e-08, "loss": 0.224, "step": 18808 }, { "epoch": 2.73, "grad_norm": 8.878170013427734, "learning_rate": 4.334730127287245e-08, "loss": 0.3808, "step": 18809 }, { "epoch": 2.73, "grad_norm": 9.428571701049805, "learning_rate": 4.3301216430369435e-08, "loss": 0.441, "step": 18810 }, { "epoch": 2.73, "grad_norm": 8.240715980529785, "learning_rate": 4.325515555639969e-08, "loss": 0.2663, "step": 18811 }, { "epoch": 2.73, "grad_norm": 8.900883674621582, "learning_rate": 4.3209118652117846e-08, "loss": 0.3731, "step": 18812 }, { "epoch": 2.73, "grad_norm": 8.578292846679688, "learning_rate": 4.316310571867654e-08, "loss": 0.3797, "step": 18813 }, { "epoch": 2.73, "grad_norm": 8.732909202575684, "learning_rate": 4.3117116757229176e-08, "loss": 0.365, "step": 18814 }, { "epoch": 2.73, "grad_norm": 10.359347343444824, "learning_rate": 4.3071151768927617e-08, "loss": 0.3965, "step": 18815 }, { "epoch": 2.73, "grad_norm": 9.43513298034668, "learning_rate": 4.302521075492349e-08, "loss": 0.4081, "step": 18816 }, { "epoch": 2.73, "grad_norm": 11.367237091064453, "learning_rate": 4.297929371636766e-08, "loss": 0.3735, "step": 18817 }, { "epoch": 2.73, "grad_norm": 9.312605857849121, "learning_rate": 4.293340065441076e-08, "loss": 0.3541, "step": 18818 }, { "epoch": 2.73, "grad_norm": 10.029419898986816, "learning_rate": 4.2887531570202416e-08, "loss": 0.396, "step": 18819 }, { "epoch": 2.73, "grad_norm": 8.883339881896973, "learning_rate": 4.2841686464891836e-08, "loss": 0.2728, "step": 18820 }, { "epoch": 2.73, "grad_norm": 9.332138061523438, "learning_rate": 4.2795865339627536e-08, "loss": 0.3579, "step": 18821 }, { "epoch": 2.73, "grad_norm": 7.585580825805664, "learning_rate": 4.27500681955576e-08, "loss": 0.3099, "step": 18822 }, { "epoch": 2.73, "grad_norm": 8.489541053771973, "learning_rate": 4.270429503382933e-08, "loss": 0.3121, "step": 18823 }, { "epoch": 2.73, "grad_norm": 8.779521942138672, "learning_rate": 4.265854585558948e-08, "loss": 0.3338, "step": 18824 }, { "epoch": 2.73, "grad_norm": 8.779335021972656, "learning_rate": 4.261282066198424e-08, "loss": 0.3869, "step": 18825 }, { "epoch": 2.73, "grad_norm": 8.424666404724121, "learning_rate": 4.256711945415914e-08, "loss": 0.3717, "step": 18826 }, { "epoch": 2.73, "grad_norm": 9.584515571594238, "learning_rate": 4.2521442233259264e-08, "loss": 0.3871, "step": 18827 }, { "epoch": 2.73, "grad_norm": 8.592950820922852, "learning_rate": 4.247578900042892e-08, "loss": 0.3213, "step": 18828 }, { "epoch": 2.73, "grad_norm": 8.294266700744629, "learning_rate": 4.2430159756811965e-08, "loss": 0.3533, "step": 18829 }, { "epoch": 2.73, "grad_norm": 7.513473033905029, "learning_rate": 4.2384554503551276e-08, "loss": 0.305, "step": 18830 }, { "epoch": 2.73, "grad_norm": 8.301705360412598, "learning_rate": 4.233897324178981e-08, "loss": 0.292, "step": 18831 }, { "epoch": 2.73, "grad_norm": 9.270730972290039, "learning_rate": 4.229341597266911e-08, "loss": 0.3643, "step": 18832 }, { "epoch": 2.73, "grad_norm": 9.888044357299805, "learning_rate": 4.224788269733093e-08, "loss": 0.3834, "step": 18833 }, { "epoch": 2.73, "grad_norm": 8.645017623901367, "learning_rate": 4.22023734169159e-08, "loss": 0.2878, "step": 18834 }, { "epoch": 2.73, "grad_norm": 8.557389259338379, "learning_rate": 4.215688813256402e-08, "loss": 0.3493, "step": 18835 }, { "epoch": 2.73, "grad_norm": 8.819037437438965, "learning_rate": 4.2111426845415024e-08, "loss": 0.3527, "step": 18836 }, { "epoch": 2.73, "grad_norm": 8.84315299987793, "learning_rate": 4.2065989556607784e-08, "loss": 0.3777, "step": 18837 }, { "epoch": 2.73, "grad_norm": 9.880071640014648, "learning_rate": 4.202057626728073e-08, "loss": 0.3852, "step": 18838 }, { "epoch": 2.73, "grad_norm": 8.592561721801758, "learning_rate": 4.197518697857161e-08, "loss": 0.3374, "step": 18839 }, { "epoch": 2.73, "grad_norm": 8.94411849975586, "learning_rate": 4.192982169161752e-08, "loss": 0.3179, "step": 18840 }, { "epoch": 2.73, "grad_norm": 8.802452087402344, "learning_rate": 4.188448040755499e-08, "loss": 0.3532, "step": 18841 }, { "epoch": 2.73, "grad_norm": 9.06230354309082, "learning_rate": 4.183916312752012e-08, "loss": 0.3048, "step": 18842 }, { "epoch": 2.73, "grad_norm": 7.692081928253174, "learning_rate": 4.179386985264821e-08, "loss": 0.3331, "step": 18843 }, { "epoch": 2.73, "grad_norm": 8.591309547424316, "learning_rate": 4.174860058407381e-08, "loss": 0.3322, "step": 18844 }, { "epoch": 2.73, "grad_norm": 9.197823524475098, "learning_rate": 4.1703355322931454e-08, "loss": 0.3232, "step": 18845 }, { "epoch": 2.73, "grad_norm": 9.31045913696289, "learning_rate": 4.1658134070354124e-08, "loss": 0.3593, "step": 18846 }, { "epoch": 2.73, "grad_norm": 8.261507987976074, "learning_rate": 4.161293682747546e-08, "loss": 0.3357, "step": 18847 }, { "epoch": 2.73, "grad_norm": 8.617900848388672, "learning_rate": 4.156776359542713e-08, "loss": 0.3847, "step": 18848 }, { "epoch": 2.73, "grad_norm": 7.528129577636719, "learning_rate": 4.152261437534144e-08, "loss": 0.35, "step": 18849 }, { "epoch": 2.74, "grad_norm": 9.171977996826172, "learning_rate": 4.1477489168349256e-08, "loss": 0.3749, "step": 18850 }, { "epoch": 2.74, "grad_norm": 7.942123889923096, "learning_rate": 4.143238797558102e-08, "loss": 0.3409, "step": 18851 }, { "epoch": 2.74, "grad_norm": 9.337857246398926, "learning_rate": 4.138731079816693e-08, "loss": 0.3825, "step": 18852 }, { "epoch": 2.74, "grad_norm": 8.622566223144531, "learning_rate": 4.1342257637235975e-08, "loss": 0.3074, "step": 18853 }, { "epoch": 2.74, "grad_norm": 9.745951652526855, "learning_rate": 4.129722849391737e-08, "loss": 0.3634, "step": 18854 }, { "epoch": 2.74, "grad_norm": 7.403624057769775, "learning_rate": 4.125222336933887e-08, "loss": 0.3247, "step": 18855 }, { "epoch": 2.74, "grad_norm": 8.814254760742188, "learning_rate": 4.1207242264628125e-08, "loss": 0.3361, "step": 18856 }, { "epoch": 2.74, "grad_norm": 7.686147689819336, "learning_rate": 4.1162285180912026e-08, "loss": 0.3821, "step": 18857 }, { "epoch": 2.74, "grad_norm": 9.687597274780273, "learning_rate": 4.111735211931688e-08, "loss": 0.3942, "step": 18858 }, { "epoch": 2.74, "grad_norm": 9.829883575439453, "learning_rate": 4.1072443080968466e-08, "loss": 0.3871, "step": 18859 }, { "epoch": 2.74, "grad_norm": 9.430625915527344, "learning_rate": 4.102755806699199e-08, "loss": 0.3316, "step": 18860 }, { "epoch": 2.74, "grad_norm": 9.0938138961792, "learning_rate": 4.0982697078511765e-08, "loss": 0.321, "step": 18861 }, { "epoch": 2.74, "grad_norm": 8.925982475280762, "learning_rate": 4.09378601166519e-08, "loss": 0.3808, "step": 18862 }, { "epoch": 2.74, "grad_norm": 9.91057014465332, "learning_rate": 4.089304718253561e-08, "loss": 0.3795, "step": 18863 }, { "epoch": 2.74, "grad_norm": 8.655130386352539, "learning_rate": 4.084825827728578e-08, "loss": 0.3478, "step": 18864 }, { "epoch": 2.74, "grad_norm": 8.410240173339844, "learning_rate": 4.0803493402024045e-08, "loss": 0.3694, "step": 18865 }, { "epoch": 2.74, "grad_norm": 8.536925315856934, "learning_rate": 4.075875255787264e-08, "loss": 0.3471, "step": 18866 }, { "epoch": 2.74, "grad_norm": 7.96197509765625, "learning_rate": 4.071403574595178e-08, "loss": 0.3116, "step": 18867 }, { "epoch": 2.74, "grad_norm": 8.801061630249023, "learning_rate": 4.0669342967382335e-08, "loss": 0.3844, "step": 18868 }, { "epoch": 2.74, "grad_norm": 8.226198196411133, "learning_rate": 4.0624674223283526e-08, "loss": 0.3507, "step": 18869 }, { "epoch": 2.74, "grad_norm": 8.385509490966797, "learning_rate": 4.0580029514774794e-08, "loss": 0.3356, "step": 18870 }, { "epoch": 2.74, "grad_norm": 7.900400161743164, "learning_rate": 4.053540884297457e-08, "loss": 0.2782, "step": 18871 }, { "epoch": 2.74, "grad_norm": 9.378908157348633, "learning_rate": 4.0490812209000636e-08, "loss": 0.3485, "step": 18872 }, { "epoch": 2.74, "grad_norm": 7.0933098793029785, "learning_rate": 4.0446239613970314e-08, "loss": 0.3448, "step": 18873 }, { "epoch": 2.74, "grad_norm": 8.25693416595459, "learning_rate": 4.040169105900049e-08, "loss": 0.3757, "step": 18874 }, { "epoch": 2.74, "grad_norm": 8.43409252166748, "learning_rate": 4.035716654520693e-08, "loss": 0.3195, "step": 18875 }, { "epoch": 2.74, "grad_norm": 8.727242469787598, "learning_rate": 4.0312666073705423e-08, "loss": 0.3444, "step": 18876 }, { "epoch": 2.74, "grad_norm": 8.77546215057373, "learning_rate": 4.026818964561074e-08, "loss": 0.3926, "step": 18877 }, { "epoch": 2.74, "grad_norm": 8.639348030090332, "learning_rate": 4.022373726203721e-08, "loss": 0.393, "step": 18878 }, { "epoch": 2.74, "grad_norm": 8.159201622009277, "learning_rate": 4.017930892409838e-08, "loss": 0.3569, "step": 18879 }, { "epoch": 2.74, "grad_norm": 8.292564392089844, "learning_rate": 4.0134904632907476e-08, "loss": 0.3449, "step": 18880 }, { "epoch": 2.74, "grad_norm": 7.943549156188965, "learning_rate": 4.009052438957683e-08, "loss": 0.3239, "step": 18881 }, { "epoch": 2.74, "grad_norm": 11.355347633361816, "learning_rate": 4.0046168195218443e-08, "loss": 0.3965, "step": 18882 }, { "epoch": 2.74, "grad_norm": 9.079426765441895, "learning_rate": 4.000183605094365e-08, "loss": 0.3817, "step": 18883 }, { "epoch": 2.74, "grad_norm": 10.537612915039062, "learning_rate": 3.995752795786289e-08, "loss": 0.4133, "step": 18884 }, { "epoch": 2.74, "grad_norm": 11.29403305053711, "learning_rate": 3.991324391708661e-08, "loss": 0.4211, "step": 18885 }, { "epoch": 2.74, "grad_norm": 8.669260025024414, "learning_rate": 3.986898392972371e-08, "loss": 0.3619, "step": 18886 }, { "epoch": 2.74, "grad_norm": 8.647984504699707, "learning_rate": 3.982474799688362e-08, "loss": 0.3688, "step": 18887 }, { "epoch": 2.74, "grad_norm": 8.645835876464844, "learning_rate": 3.9780536119674024e-08, "loss": 0.3229, "step": 18888 }, { "epoch": 2.74, "grad_norm": 8.47610855102539, "learning_rate": 3.973634829920325e-08, "loss": 0.3402, "step": 18889 }, { "epoch": 2.74, "grad_norm": 10.043844223022461, "learning_rate": 3.969218453657786e-08, "loss": 0.3236, "step": 18890 }, { "epoch": 2.74, "grad_norm": 8.542864799499512, "learning_rate": 3.9648044832904517e-08, "loss": 0.3193, "step": 18891 }, { "epoch": 2.74, "grad_norm": 8.994388580322266, "learning_rate": 3.96039291892889e-08, "loss": 0.3437, "step": 18892 }, { "epoch": 2.74, "grad_norm": 8.489389419555664, "learning_rate": 3.955983760683634e-08, "loss": 0.328, "step": 18893 }, { "epoch": 2.74, "grad_norm": 9.672402381896973, "learning_rate": 3.9515770086651637e-08, "loss": 0.3965, "step": 18894 }, { "epoch": 2.74, "grad_norm": 8.657026290893555, "learning_rate": 3.9471726629838555e-08, "loss": 0.3663, "step": 18895 }, { "epoch": 2.74, "grad_norm": 7.432692527770996, "learning_rate": 3.9427707237500776e-08, "loss": 0.2838, "step": 18896 }, { "epoch": 2.74, "grad_norm": 7.815023899078369, "learning_rate": 3.938371191074097e-08, "loss": 0.3256, "step": 18897 }, { "epoch": 2.74, "grad_norm": 9.804412841796875, "learning_rate": 3.9339740650661365e-08, "loss": 0.4172, "step": 18898 }, { "epoch": 2.74, "grad_norm": 8.910189628601074, "learning_rate": 3.9295793458363866e-08, "loss": 0.315, "step": 18899 }, { "epoch": 2.74, "grad_norm": 8.724754333496094, "learning_rate": 3.925187033494903e-08, "loss": 0.3266, "step": 18900 }, { "epoch": 2.74, "grad_norm": 10.16861629486084, "learning_rate": 3.920797128151754e-08, "loss": 0.4084, "step": 18901 }, { "epoch": 2.74, "grad_norm": 10.436691284179688, "learning_rate": 3.916409629916928e-08, "loss": 0.3824, "step": 18902 }, { "epoch": 2.74, "grad_norm": 8.307661056518555, "learning_rate": 3.912024538900349e-08, "loss": 0.3061, "step": 18903 }, { "epoch": 2.74, "grad_norm": 8.620948791503906, "learning_rate": 3.9076418552118514e-08, "loss": 0.3724, "step": 18904 }, { "epoch": 2.74, "grad_norm": 9.75217056274414, "learning_rate": 3.90326157896127e-08, "loss": 0.3818, "step": 18905 }, { "epoch": 2.74, "grad_norm": 9.05549144744873, "learning_rate": 3.898883710258327e-08, "loss": 0.3341, "step": 18906 }, { "epoch": 2.74, "grad_norm": 9.335489273071289, "learning_rate": 3.894508249212691e-08, "loss": 0.3676, "step": 18907 }, { "epoch": 2.74, "grad_norm": 7.731400966644287, "learning_rate": 3.890135195934019e-08, "loss": 0.3386, "step": 18908 }, { "epoch": 2.74, "grad_norm": 8.979889869689941, "learning_rate": 3.885764550531812e-08, "loss": 0.375, "step": 18909 }, { "epoch": 2.74, "grad_norm": 8.385330200195312, "learning_rate": 3.8813963131156366e-08, "loss": 0.3742, "step": 18910 }, { "epoch": 2.74, "grad_norm": 8.795289039611816, "learning_rate": 3.877030483794896e-08, "loss": 0.3639, "step": 18911 }, { "epoch": 2.74, "grad_norm": 7.493968486785889, "learning_rate": 3.872667062678958e-08, "loss": 0.3358, "step": 18912 }, { "epoch": 2.74, "grad_norm": 8.59899616241455, "learning_rate": 3.868306049877168e-08, "loss": 0.3582, "step": 18913 }, { "epoch": 2.74, "grad_norm": 9.070853233337402, "learning_rate": 3.8639474454987716e-08, "loss": 0.3883, "step": 18914 }, { "epoch": 2.74, "grad_norm": 8.595428466796875, "learning_rate": 3.8595912496529604e-08, "loss": 0.3529, "step": 18915 }, { "epoch": 2.74, "grad_norm": 9.169119834899902, "learning_rate": 3.8552374624488904e-08, "loss": 0.3981, "step": 18916 }, { "epoch": 2.74, "grad_norm": 9.413784980773926, "learning_rate": 3.850886083995619e-08, "loss": 0.395, "step": 18917 }, { "epoch": 2.74, "grad_norm": 8.024958610534668, "learning_rate": 3.846537114402171e-08, "loss": 0.3347, "step": 18918 }, { "epoch": 2.75, "grad_norm": 10.067275047302246, "learning_rate": 3.842190553777502e-08, "loss": 0.4151, "step": 18919 }, { "epoch": 2.75, "grad_norm": 8.782755851745605, "learning_rate": 3.8378464022305154e-08, "loss": 0.398, "step": 18920 }, { "epoch": 2.75, "grad_norm": 7.740246772766113, "learning_rate": 3.833504659870024e-08, "loss": 0.3304, "step": 18921 }, { "epoch": 2.75, "grad_norm": 8.229703903198242, "learning_rate": 3.8291653268048395e-08, "loss": 0.3243, "step": 18922 }, { "epoch": 2.75, "grad_norm": 8.521001815795898, "learning_rate": 3.8248284031436315e-08, "loss": 0.3225, "step": 18923 }, { "epoch": 2.75, "grad_norm": 8.324161529541016, "learning_rate": 3.820493888995113e-08, "loss": 0.3311, "step": 18924 }, { "epoch": 2.75, "grad_norm": 7.913690567016602, "learning_rate": 3.81616178446782e-08, "loss": 0.402, "step": 18925 }, { "epoch": 2.75, "grad_norm": 7.607529163360596, "learning_rate": 3.811832089670319e-08, "loss": 0.3041, "step": 18926 }, { "epoch": 2.75, "grad_norm": 9.279090881347656, "learning_rate": 3.807504804711081e-08, "loss": 0.3371, "step": 18927 }, { "epoch": 2.75, "grad_norm": 9.009106636047363, "learning_rate": 3.803179929698486e-08, "loss": 0.3658, "step": 18928 }, { "epoch": 2.75, "grad_norm": 8.563117980957031, "learning_rate": 3.798857464740956e-08, "loss": 0.3426, "step": 18929 }, { "epoch": 2.75, "grad_norm": 10.6329984664917, "learning_rate": 3.794537409946707e-08, "loss": 0.3301, "step": 18930 }, { "epoch": 2.75, "grad_norm": 8.154655456542969, "learning_rate": 3.79021976542403e-08, "loss": 0.2813, "step": 18931 }, { "epoch": 2.75, "grad_norm": 10.093374252319336, "learning_rate": 3.785904531281059e-08, "loss": 0.4028, "step": 18932 }, { "epoch": 2.75, "grad_norm": 7.898340225219727, "learning_rate": 3.7815917076259305e-08, "loss": 0.3497, "step": 18933 }, { "epoch": 2.75, "grad_norm": 9.049012184143066, "learning_rate": 3.77728129456667e-08, "loss": 0.3846, "step": 18934 }, { "epoch": 2.75, "grad_norm": 10.262688636779785, "learning_rate": 3.77297329221129e-08, "loss": 0.396, "step": 18935 }, { "epoch": 2.75, "grad_norm": 9.430168151855469, "learning_rate": 3.7686677006677046e-08, "loss": 0.3552, "step": 18936 }, { "epoch": 2.75, "grad_norm": 8.209145545959473, "learning_rate": 3.7643645200437945e-08, "loss": 0.3856, "step": 18937 }, { "epoch": 2.75, "grad_norm": 8.486225128173828, "learning_rate": 3.760063750447362e-08, "loss": 0.3835, "step": 18938 }, { "epoch": 2.75, "grad_norm": 9.86357593536377, "learning_rate": 3.755765391986165e-08, "loss": 0.3355, "step": 18939 }, { "epoch": 2.75, "grad_norm": 7.810798168182373, "learning_rate": 3.751469444767874e-08, "loss": 0.3241, "step": 18940 }, { "epoch": 2.75, "grad_norm": 7.993410587310791, "learning_rate": 3.747175908900146e-08, "loss": 0.315, "step": 18941 }, { "epoch": 2.75, "grad_norm": 9.006178855895996, "learning_rate": 3.7428847844905185e-08, "loss": 0.3481, "step": 18942 }, { "epoch": 2.75, "grad_norm": 8.961567878723145, "learning_rate": 3.7385960716465156e-08, "loss": 0.3514, "step": 18943 }, { "epoch": 2.75, "grad_norm": 8.26041316986084, "learning_rate": 3.7343097704755745e-08, "loss": 0.3632, "step": 18944 }, { "epoch": 2.75, "grad_norm": 7.237438678741455, "learning_rate": 3.730025881085097e-08, "loss": 0.268, "step": 18945 }, { "epoch": 2.75, "grad_norm": 9.368744850158691, "learning_rate": 3.7257444035823985e-08, "loss": 0.3437, "step": 18946 }, { "epoch": 2.75, "grad_norm": 8.884688377380371, "learning_rate": 3.721465338074736e-08, "loss": 0.2976, "step": 18947 }, { "epoch": 2.75, "grad_norm": 9.179973602294922, "learning_rate": 3.7171886846693254e-08, "loss": 0.386, "step": 18948 }, { "epoch": 2.75, "grad_norm": 8.167499542236328, "learning_rate": 3.712914443473303e-08, "loss": 0.3302, "step": 18949 }, { "epoch": 2.75, "grad_norm": 8.514554977416992, "learning_rate": 3.708642614593771e-08, "loss": 0.3628, "step": 18950 }, { "epoch": 2.75, "grad_norm": 8.13989543914795, "learning_rate": 3.704373198137734e-08, "loss": 0.332, "step": 18951 }, { "epoch": 2.75, "grad_norm": 8.557083129882812, "learning_rate": 3.700106194212171e-08, "loss": 0.3791, "step": 18952 }, { "epoch": 2.75, "grad_norm": 8.499031066894531, "learning_rate": 3.6958416029239656e-08, "loss": 0.299, "step": 18953 }, { "epoch": 2.75, "grad_norm": 8.66849422454834, "learning_rate": 3.691579424379976e-08, "loss": 0.3423, "step": 18954 }, { "epoch": 2.75, "grad_norm": 8.024545669555664, "learning_rate": 3.6873196586869826e-08, "loss": 0.2474, "step": 18955 }, { "epoch": 2.75, "grad_norm": 8.418742179870605, "learning_rate": 3.68306230595169e-08, "loss": 0.362, "step": 18956 }, { "epoch": 2.75, "grad_norm": 9.394021987915039, "learning_rate": 3.678807366280789e-08, "loss": 0.3748, "step": 18957 }, { "epoch": 2.75, "grad_norm": 10.208786010742188, "learning_rate": 3.6745548397808526e-08, "loss": 0.394, "step": 18958 }, { "epoch": 2.75, "grad_norm": 8.455233573913574, "learning_rate": 3.6703047265584374e-08, "loss": 0.3959, "step": 18959 }, { "epoch": 2.75, "grad_norm": 8.133028030395508, "learning_rate": 3.6660570267200264e-08, "loss": 0.3754, "step": 18960 }, { "epoch": 2.75, "grad_norm": 9.452381134033203, "learning_rate": 3.6618117403720114e-08, "loss": 0.3133, "step": 18961 }, { "epoch": 2.75, "grad_norm": 9.705499649047852, "learning_rate": 3.657568867620797e-08, "loss": 0.4229, "step": 18962 }, { "epoch": 2.75, "grad_norm": 9.035870552062988, "learning_rate": 3.6533284085726204e-08, "loss": 0.3524, "step": 18963 }, { "epoch": 2.75, "grad_norm": 8.337031364440918, "learning_rate": 3.649090363333784e-08, "loss": 0.3347, "step": 18964 }, { "epoch": 2.75, "grad_norm": 8.285799980163574, "learning_rate": 3.644854732010416e-08, "loss": 0.3635, "step": 18965 }, { "epoch": 2.75, "grad_norm": 9.788169860839844, "learning_rate": 3.640621514708664e-08, "loss": 0.385, "step": 18966 }, { "epoch": 2.75, "grad_norm": 9.077695846557617, "learning_rate": 3.636390711534565e-08, "loss": 0.3175, "step": 18967 }, { "epoch": 2.75, "grad_norm": 8.602171897888184, "learning_rate": 3.6321623225941236e-08, "loss": 0.3741, "step": 18968 }, { "epoch": 2.75, "grad_norm": 8.806039810180664, "learning_rate": 3.627936347993277e-08, "loss": 0.3239, "step": 18969 }, { "epoch": 2.75, "grad_norm": 10.12142562866211, "learning_rate": 3.6237127878378956e-08, "loss": 0.3757, "step": 18970 }, { "epoch": 2.75, "grad_norm": 8.10643196105957, "learning_rate": 3.619491642233796e-08, "loss": 0.3181, "step": 18971 }, { "epoch": 2.75, "grad_norm": 10.451705932617188, "learning_rate": 3.615272911286726e-08, "loss": 0.3706, "step": 18972 }, { "epoch": 2.75, "grad_norm": 8.037920951843262, "learning_rate": 3.6110565951023905e-08, "loss": 0.3461, "step": 18973 }, { "epoch": 2.75, "grad_norm": 8.347838401794434, "learning_rate": 3.606842693786416e-08, "loss": 0.3373, "step": 18974 }, { "epoch": 2.75, "grad_norm": 8.549642562866211, "learning_rate": 3.6026312074443735e-08, "loss": 0.2912, "step": 18975 }, { "epoch": 2.75, "grad_norm": 8.259296417236328, "learning_rate": 3.598422136181789e-08, "loss": 0.3455, "step": 18976 }, { "epoch": 2.75, "grad_norm": 7.90726900100708, "learning_rate": 3.594215480104068e-08, "loss": 0.3234, "step": 18977 }, { "epoch": 2.75, "grad_norm": 9.665396690368652, "learning_rate": 3.59001123931667e-08, "loss": 0.3689, "step": 18978 }, { "epoch": 2.75, "grad_norm": 7.715945243835449, "learning_rate": 3.5858094139248676e-08, "loss": 0.2899, "step": 18979 }, { "epoch": 2.75, "grad_norm": 8.11681842803955, "learning_rate": 3.581610004033963e-08, "loss": 0.3466, "step": 18980 }, { "epoch": 2.75, "grad_norm": 7.79573917388916, "learning_rate": 3.577413009749164e-08, "loss": 0.2882, "step": 18981 }, { "epoch": 2.75, "grad_norm": 7.669633865356445, "learning_rate": 3.573218431175595e-08, "loss": 0.2801, "step": 18982 }, { "epoch": 2.75, "grad_norm": 8.190675735473633, "learning_rate": 3.569026268418385e-08, "loss": 0.3136, "step": 18983 }, { "epoch": 2.75, "grad_norm": 8.216419219970703, "learning_rate": 3.564836521582515e-08, "loss": 0.3762, "step": 18984 }, { "epoch": 2.75, "grad_norm": 8.757761001586914, "learning_rate": 3.560649190772991e-08, "loss": 0.3616, "step": 18985 }, { "epoch": 2.75, "grad_norm": 9.03940486907959, "learning_rate": 3.556464276094684e-08, "loss": 0.3555, "step": 18986 }, { "epoch": 2.75, "grad_norm": 8.461981773376465, "learning_rate": 3.5522817776524884e-08, "loss": 0.3786, "step": 18987 }, { "epoch": 2.76, "grad_norm": 8.311629295349121, "learning_rate": 3.548101695551142e-08, "loss": 0.3236, "step": 18988 }, { "epoch": 2.76, "grad_norm": 8.668416023254395, "learning_rate": 3.5439240298953953e-08, "loss": 0.3694, "step": 18989 }, { "epoch": 2.76, "grad_norm": 8.775679588317871, "learning_rate": 3.539748780789909e-08, "loss": 0.3615, "step": 18990 }, { "epoch": 2.76, "grad_norm": 8.198612213134766, "learning_rate": 3.5355759483392864e-08, "loss": 0.3056, "step": 18991 }, { "epoch": 2.76, "grad_norm": 10.330778121948242, "learning_rate": 3.5314055326480686e-08, "loss": 0.3908, "step": 18992 }, { "epoch": 2.76, "grad_norm": 9.035215377807617, "learning_rate": 3.527237533820737e-08, "loss": 0.3196, "step": 18993 }, { "epoch": 2.76, "grad_norm": 8.250410079956055, "learning_rate": 3.5230719519617205e-08, "loss": 0.3461, "step": 18994 }, { "epoch": 2.76, "grad_norm": 7.356644153594971, "learning_rate": 3.518908787175367e-08, "loss": 0.3129, "step": 18995 }, { "epoch": 2.76, "grad_norm": 9.220083236694336, "learning_rate": 3.5147480395659953e-08, "loss": 0.3691, "step": 18996 }, { "epoch": 2.76, "grad_norm": 8.14448356628418, "learning_rate": 3.5105897092378545e-08, "loss": 0.3386, "step": 18997 }, { "epoch": 2.76, "grad_norm": 9.276750564575195, "learning_rate": 3.506433796295094e-08, "loss": 0.3323, "step": 18998 }, { "epoch": 2.76, "grad_norm": 9.622041702270508, "learning_rate": 3.502280300841853e-08, "loss": 0.3372, "step": 18999 }, { "epoch": 2.76, "grad_norm": 8.44328498840332, "learning_rate": 3.4981292229821824e-08, "loss": 0.3529, "step": 19000 }, { "epoch": 2.76, "grad_norm": 9.700081825256348, "learning_rate": 3.493980562820098e-08, "loss": 0.3534, "step": 19001 }, { "epoch": 2.76, "grad_norm": 9.300802230834961, "learning_rate": 3.489834320459517e-08, "loss": 0.4022, "step": 19002 }, { "epoch": 2.76, "grad_norm": 7.9420599937438965, "learning_rate": 3.485690496004323e-08, "loss": 0.3368, "step": 19003 }, { "epoch": 2.76, "grad_norm": 8.194382667541504, "learning_rate": 3.4815490895583336e-08, "loss": 0.3019, "step": 19004 }, { "epoch": 2.76, "grad_norm": 9.553597450256348, "learning_rate": 3.4774101012252973e-08, "loss": 0.3613, "step": 19005 }, { "epoch": 2.76, "grad_norm": 7.757997512817383, "learning_rate": 3.4732735311089336e-08, "loss": 0.3257, "step": 19006 }, { "epoch": 2.76, "grad_norm": 8.420379638671875, "learning_rate": 3.469139379312858e-08, "loss": 0.349, "step": 19007 }, { "epoch": 2.76, "grad_norm": 9.234686851501465, "learning_rate": 3.465007645940643e-08, "loss": 0.3505, "step": 19008 }, { "epoch": 2.76, "grad_norm": 7.884129047393799, "learning_rate": 3.460878331095818e-08, "loss": 0.293, "step": 19009 }, { "epoch": 2.76, "grad_norm": 8.980992317199707, "learning_rate": 3.456751434881822e-08, "loss": 0.3415, "step": 19010 }, { "epoch": 2.76, "grad_norm": 8.094265937805176, "learning_rate": 3.4526269574020494e-08, "loss": 0.3178, "step": 19011 }, { "epoch": 2.76, "grad_norm": 8.735634803771973, "learning_rate": 3.44850489875983e-08, "loss": 0.3521, "step": 19012 }, { "epoch": 2.76, "grad_norm": 10.349847793579102, "learning_rate": 3.444385259058458e-08, "loss": 0.359, "step": 19013 }, { "epoch": 2.76, "grad_norm": 7.949338912963867, "learning_rate": 3.440268038401117e-08, "loss": 0.3076, "step": 19014 }, { "epoch": 2.76, "grad_norm": 8.767680168151855, "learning_rate": 3.436153236890959e-08, "loss": 0.4093, "step": 19015 }, { "epoch": 2.76, "grad_norm": 10.263811111450195, "learning_rate": 3.432040854631102e-08, "loss": 0.3486, "step": 19016 }, { "epoch": 2.76, "grad_norm": 8.366032600402832, "learning_rate": 3.42793089172454e-08, "loss": 0.2721, "step": 19017 }, { "epoch": 2.76, "grad_norm": 8.848443984985352, "learning_rate": 3.423823348274268e-08, "loss": 0.3593, "step": 19018 }, { "epoch": 2.76, "grad_norm": 8.461665153503418, "learning_rate": 3.419718224383161e-08, "loss": 0.333, "step": 19019 }, { "epoch": 2.76, "grad_norm": 8.158164024353027, "learning_rate": 3.415615520154114e-08, "loss": 0.3605, "step": 19020 }, { "epoch": 2.76, "grad_norm": 8.16097640991211, "learning_rate": 3.4115152356898656e-08, "loss": 0.3634, "step": 19021 }, { "epoch": 2.76, "grad_norm": 8.209394454956055, "learning_rate": 3.4074173710931796e-08, "loss": 0.3076, "step": 19022 }, { "epoch": 2.76, "grad_norm": 10.06689739227295, "learning_rate": 3.403321926466696e-08, "loss": 0.3046, "step": 19023 }, { "epoch": 2.76, "grad_norm": 7.567096710205078, "learning_rate": 3.399228901913032e-08, "loss": 0.3168, "step": 19024 }, { "epoch": 2.76, "grad_norm": 8.738798141479492, "learning_rate": 3.395138297534716e-08, "loss": 0.32, "step": 19025 }, { "epoch": 2.76, "grad_norm": 9.192649841308594, "learning_rate": 3.391050113434257e-08, "loss": 0.3538, "step": 19026 }, { "epoch": 2.76, "grad_norm": 8.378668785095215, "learning_rate": 3.386964349714061e-08, "loss": 0.3343, "step": 19027 }, { "epoch": 2.76, "grad_norm": 7.963348865509033, "learning_rate": 3.3828810064764904e-08, "loss": 0.3236, "step": 19028 }, { "epoch": 2.76, "grad_norm": 7.99887752532959, "learning_rate": 3.378800083823852e-08, "loss": 0.3472, "step": 19029 }, { "epoch": 2.76, "grad_norm": 8.613653182983398, "learning_rate": 3.374721581858386e-08, "loss": 0.3893, "step": 19030 }, { "epoch": 2.76, "grad_norm": 8.747113227844238, "learning_rate": 3.370645500682268e-08, "loss": 0.3711, "step": 19031 }, { "epoch": 2.76, "grad_norm": 8.603537559509277, "learning_rate": 3.366571840397625e-08, "loss": 0.2795, "step": 19032 }, { "epoch": 2.76, "grad_norm": 8.236554145812988, "learning_rate": 3.36250060110651e-08, "loss": 0.3458, "step": 19033 }, { "epoch": 2.76, "grad_norm": 7.756348609924316, "learning_rate": 3.35843178291092e-08, "loss": 0.3451, "step": 19034 }, { "epoch": 2.76, "grad_norm": 8.95836067199707, "learning_rate": 3.354365385912794e-08, "loss": 0.3398, "step": 19035 }, { "epoch": 2.76, "grad_norm": 9.038717269897461, "learning_rate": 3.350301410214018e-08, "loss": 0.3504, "step": 19036 }, { "epoch": 2.76, "grad_norm": 9.261070251464844, "learning_rate": 3.346239855916411e-08, "loss": 0.3172, "step": 19037 }, { "epoch": 2.76, "grad_norm": 8.613945960998535, "learning_rate": 3.34218072312169e-08, "loss": 0.3778, "step": 19038 }, { "epoch": 2.76, "grad_norm": 8.282414436340332, "learning_rate": 3.338124011931609e-08, "loss": 0.3795, "step": 19039 }, { "epoch": 2.76, "grad_norm": 9.624654769897461, "learning_rate": 3.334069722447741e-08, "loss": 0.3381, "step": 19040 }, { "epoch": 2.76, "grad_norm": 7.813334941864014, "learning_rate": 3.3300178547717164e-08, "loss": 0.3209, "step": 19041 }, { "epoch": 2.76, "grad_norm": 12.088831901550293, "learning_rate": 3.325968409004998e-08, "loss": 0.4193, "step": 19042 }, { "epoch": 2.76, "grad_norm": 8.802270889282227, "learning_rate": 3.3219213852490936e-08, "loss": 0.3163, "step": 19043 }, { "epoch": 2.76, "grad_norm": 8.656829833984375, "learning_rate": 3.3178767836053445e-08, "loss": 0.3516, "step": 19044 }, { "epoch": 2.76, "grad_norm": 9.024456024169922, "learning_rate": 3.313834604175103e-08, "loss": 0.3619, "step": 19045 }, { "epoch": 2.76, "grad_norm": 8.283713340759277, "learning_rate": 3.3097948470596325e-08, "loss": 0.3242, "step": 19046 }, { "epoch": 2.76, "grad_norm": 7.563807964324951, "learning_rate": 3.305757512360152e-08, "loss": 0.3108, "step": 19047 }, { "epoch": 2.76, "grad_norm": 8.678027153015137, "learning_rate": 3.301722600177803e-08, "loss": 0.3514, "step": 19048 }, { "epoch": 2.76, "grad_norm": 8.598609924316406, "learning_rate": 3.2976901106136825e-08, "loss": 0.3372, "step": 19049 }, { "epoch": 2.76, "grad_norm": 8.30487060546875, "learning_rate": 3.293660043768798e-08, "loss": 0.3399, "step": 19050 }, { "epoch": 2.76, "grad_norm": 8.63306713104248, "learning_rate": 3.289632399744158e-08, "loss": 0.3622, "step": 19051 }, { "epoch": 2.76, "grad_norm": 8.61801528930664, "learning_rate": 3.2856071786406034e-08, "loss": 0.34, "step": 19052 }, { "epoch": 2.76, "grad_norm": 9.09823226928711, "learning_rate": 3.281584380559044e-08, "loss": 0.325, "step": 19053 }, { "epoch": 2.76, "grad_norm": 8.52145004272461, "learning_rate": 3.27756400560022e-08, "loss": 0.331, "step": 19054 }, { "epoch": 2.76, "grad_norm": 8.34634017944336, "learning_rate": 3.273546053864884e-08, "loss": 0.4042, "step": 19055 }, { "epoch": 2.76, "grad_norm": 8.865983963012695, "learning_rate": 3.2695305254536786e-08, "loss": 0.3745, "step": 19056 }, { "epoch": 2.77, "grad_norm": 10.313982009887695, "learning_rate": 3.265517420467212e-08, "loss": 0.4155, "step": 19057 }, { "epoch": 2.77, "grad_norm": 8.322711944580078, "learning_rate": 3.261506739006048e-08, "loss": 0.3537, "step": 19058 }, { "epoch": 2.77, "grad_norm": 8.189623832702637, "learning_rate": 3.257498481170629e-08, "loss": 0.3677, "step": 19059 }, { "epoch": 2.77, "grad_norm": 7.987309455871582, "learning_rate": 3.2534926470614175e-08, "loss": 0.3845, "step": 19060 }, { "epoch": 2.77, "grad_norm": 8.967120170593262, "learning_rate": 3.2494892367787244e-08, "loss": 0.3271, "step": 19061 }, { "epoch": 2.77, "grad_norm": 8.832353591918945, "learning_rate": 3.245488250422901e-08, "loss": 0.3624, "step": 19062 }, { "epoch": 2.77, "grad_norm": 8.01164722442627, "learning_rate": 3.241489688094146e-08, "loss": 0.3375, "step": 19063 }, { "epoch": 2.77, "grad_norm": 8.459115982055664, "learning_rate": 3.237493549892656e-08, "loss": 0.3381, "step": 19064 }, { "epoch": 2.77, "grad_norm": 9.852018356323242, "learning_rate": 3.2334998359185404e-08, "loss": 0.4778, "step": 19065 }, { "epoch": 2.77, "grad_norm": 7.9047088623046875, "learning_rate": 3.229508546271853e-08, "loss": 0.33, "step": 19066 }, { "epoch": 2.77, "grad_norm": 7.808221340179443, "learning_rate": 3.225519681052602e-08, "loss": 0.3177, "step": 19067 }, { "epoch": 2.77, "grad_norm": 8.328845024108887, "learning_rate": 3.221533240360708e-08, "loss": 0.3317, "step": 19068 }, { "epoch": 2.77, "grad_norm": 7.78274393081665, "learning_rate": 3.2175492242960456e-08, "loss": 0.3243, "step": 19069 }, { "epoch": 2.77, "grad_norm": 8.602523803710938, "learning_rate": 3.213567632958436e-08, "loss": 0.3387, "step": 19070 }, { "epoch": 2.77, "grad_norm": 8.903141021728516, "learning_rate": 3.2095884664476325e-08, "loss": 0.343, "step": 19071 }, { "epoch": 2.77, "grad_norm": 9.238463401794434, "learning_rate": 3.205611724863333e-08, "loss": 0.3548, "step": 19072 }, { "epoch": 2.77, "grad_norm": 9.870211601257324, "learning_rate": 3.2016374083051356e-08, "loss": 0.3963, "step": 19073 }, { "epoch": 2.77, "grad_norm": 8.177495002746582, "learning_rate": 3.1976655168726496e-08, "loss": 0.2955, "step": 19074 }, { "epoch": 2.77, "grad_norm": 8.321372985839844, "learning_rate": 3.1936960506653397e-08, "loss": 0.3062, "step": 19075 }, { "epoch": 2.77, "grad_norm": 10.117388725280762, "learning_rate": 3.1897290097827155e-08, "loss": 0.3648, "step": 19076 }, { "epoch": 2.77, "grad_norm": 9.617535591125488, "learning_rate": 3.1857643943240976e-08, "loss": 0.3755, "step": 19077 }, { "epoch": 2.77, "grad_norm": 7.950743198394775, "learning_rate": 3.181802204388873e-08, "loss": 0.3342, "step": 19078 }, { "epoch": 2.77, "grad_norm": 8.272467613220215, "learning_rate": 3.1778424400762725e-08, "loss": 0.337, "step": 19079 }, { "epoch": 2.77, "grad_norm": 9.492899894714355, "learning_rate": 3.173885101485507e-08, "loss": 0.3344, "step": 19080 }, { "epoch": 2.77, "grad_norm": 9.847418785095215, "learning_rate": 3.1699301887157306e-08, "loss": 0.4077, "step": 19081 }, { "epoch": 2.77, "grad_norm": 8.167718887329102, "learning_rate": 3.165977701866018e-08, "loss": 0.3333, "step": 19082 }, { "epoch": 2.77, "grad_norm": 9.167623519897461, "learning_rate": 3.1620276410353916e-08, "loss": 0.3158, "step": 19083 }, { "epoch": 2.77, "grad_norm": 8.38857650756836, "learning_rate": 3.158080006322816e-08, "loss": 0.3055, "step": 19084 }, { "epoch": 2.77, "grad_norm": 8.973625183105469, "learning_rate": 3.154134797827201e-08, "loss": 0.3122, "step": 19085 }, { "epoch": 2.77, "grad_norm": 9.843082427978516, "learning_rate": 3.1501920156473793e-08, "loss": 0.3772, "step": 19086 }, { "epoch": 2.77, "grad_norm": 10.008855819702148, "learning_rate": 3.1462516598821264e-08, "loss": 0.4368, "step": 19087 }, { "epoch": 2.77, "grad_norm": 10.37741470336914, "learning_rate": 3.142313730630175e-08, "loss": 0.408, "step": 19088 }, { "epoch": 2.77, "grad_norm": 7.770096778869629, "learning_rate": 3.138378227990168e-08, "loss": 0.3315, "step": 19089 }, { "epoch": 2.77, "grad_norm": 8.046571731567383, "learning_rate": 3.134445152060716e-08, "loss": 0.3123, "step": 19090 }, { "epoch": 2.77, "grad_norm": 8.255331993103027, "learning_rate": 3.130514502940351e-08, "loss": 0.3478, "step": 19091 }, { "epoch": 2.77, "grad_norm": 9.803852081298828, "learning_rate": 3.12658628072755e-08, "loss": 0.4266, "step": 19092 }, { "epoch": 2.77, "grad_norm": 8.362629890441895, "learning_rate": 3.1226604855207336e-08, "loss": 0.286, "step": 19093 }, { "epoch": 2.77, "grad_norm": 8.702576637268066, "learning_rate": 3.1187371174182356e-08, "loss": 0.3842, "step": 19094 }, { "epoch": 2.77, "grad_norm": 8.018836975097656, "learning_rate": 3.1148161765183754e-08, "loss": 0.3243, "step": 19095 }, { "epoch": 2.77, "grad_norm": 8.337048530578613, "learning_rate": 3.110897662919354e-08, "loss": 0.3159, "step": 19096 }, { "epoch": 2.77, "grad_norm": 8.356939315795898, "learning_rate": 3.106981576719392e-08, "loss": 0.3259, "step": 19097 }, { "epoch": 2.77, "grad_norm": 8.296486854553223, "learning_rate": 3.103067918016555e-08, "loss": 0.343, "step": 19098 }, { "epoch": 2.77, "grad_norm": 8.431694984436035, "learning_rate": 3.09915668690891e-08, "loss": 0.34, "step": 19099 }, { "epoch": 2.77, "grad_norm": 8.29849910736084, "learning_rate": 3.095247883494456e-08, "loss": 0.2832, "step": 19100 }, { "epoch": 2.77, "grad_norm": 7.964025020599365, "learning_rate": 3.0913415078711036e-08, "loss": 0.3238, "step": 19101 }, { "epoch": 2.77, "grad_norm": 8.510757446289062, "learning_rate": 3.08743756013673e-08, "loss": 0.3902, "step": 19102 }, { "epoch": 2.77, "grad_norm": 7.422729015350342, "learning_rate": 3.0835360403891455e-08, "loss": 0.3114, "step": 19103 }, { "epoch": 2.77, "grad_norm": 7.368251323699951, "learning_rate": 3.0796369487260946e-08, "loss": 0.2858, "step": 19104 }, { "epoch": 2.77, "grad_norm": 7.781215667724609, "learning_rate": 3.075740285245254e-08, "loss": 0.3058, "step": 19105 }, { "epoch": 2.77, "grad_norm": 8.920073509216309, "learning_rate": 3.0718460500442575e-08, "loss": 0.3519, "step": 19106 }, { "epoch": 2.77, "grad_norm": 8.918588638305664, "learning_rate": 3.067954243220683e-08, "loss": 0.3841, "step": 19107 }, { "epoch": 2.77, "grad_norm": 7.67806339263916, "learning_rate": 3.0640648648719846e-08, "loss": 0.3199, "step": 19108 }, { "epoch": 2.77, "grad_norm": 8.565178871154785, "learning_rate": 3.060177915095663e-08, "loss": 0.3518, "step": 19109 }, { "epoch": 2.77, "grad_norm": 8.120755195617676, "learning_rate": 3.0562933939890624e-08, "loss": 0.3905, "step": 19110 }, { "epoch": 2.77, "grad_norm": 8.233447074890137, "learning_rate": 3.0524113016495155e-08, "loss": 0.3357, "step": 19111 }, { "epoch": 2.77, "grad_norm": 8.336507797241211, "learning_rate": 3.048531638174279e-08, "loss": 0.3108, "step": 19112 }, { "epoch": 2.77, "grad_norm": 8.492477416992188, "learning_rate": 3.0446544036605516e-08, "loss": 0.3576, "step": 19113 }, { "epoch": 2.77, "grad_norm": 9.025138854980469, "learning_rate": 3.0407795982054784e-08, "loss": 0.3546, "step": 19114 }, { "epoch": 2.77, "grad_norm": 7.88043212890625, "learning_rate": 3.036907221906115e-08, "loss": 0.3119, "step": 19115 }, { "epoch": 2.77, "grad_norm": 10.156951904296875, "learning_rate": 3.033037274859518e-08, "loss": 0.4374, "step": 19116 }, { "epoch": 2.77, "grad_norm": 8.379820823669434, "learning_rate": 3.029169757162597e-08, "loss": 0.3083, "step": 19117 }, { "epoch": 2.77, "grad_norm": 8.943075180053711, "learning_rate": 3.025304668912276e-08, "loss": 0.3399, "step": 19118 }, { "epoch": 2.77, "grad_norm": 7.661651611328125, "learning_rate": 3.0214420102053775e-08, "loss": 0.2575, "step": 19119 }, { "epoch": 2.77, "grad_norm": 9.466222763061523, "learning_rate": 3.0175817811386786e-08, "loss": 0.4055, "step": 19120 }, { "epoch": 2.77, "grad_norm": 7.989922046661377, "learning_rate": 3.01372398180888e-08, "loss": 0.3202, "step": 19121 }, { "epoch": 2.77, "grad_norm": 8.427659034729004, "learning_rate": 3.009868612312649e-08, "loss": 0.343, "step": 19122 }, { "epoch": 2.77, "grad_norm": 9.17793083190918, "learning_rate": 3.0060156727465647e-08, "loss": 0.3698, "step": 19123 }, { "epoch": 2.77, "grad_norm": 8.230338096618652, "learning_rate": 3.0021651632071596e-08, "loss": 0.3351, "step": 19124 }, { "epoch": 2.77, "grad_norm": 9.070679664611816, "learning_rate": 2.9983170837909025e-08, "loss": 0.3518, "step": 19125 }, { "epoch": 2.78, "grad_norm": 8.170075416564941, "learning_rate": 2.994471434594192e-08, "loss": 0.3229, "step": 19126 }, { "epoch": 2.78, "grad_norm": 8.220508575439453, "learning_rate": 2.9906282157133863e-08, "loss": 0.3225, "step": 19127 }, { "epoch": 2.78, "grad_norm": 9.523490905761719, "learning_rate": 2.986787427244774e-08, "loss": 0.3989, "step": 19128 }, { "epoch": 2.78, "grad_norm": 9.064370155334473, "learning_rate": 2.9829490692845574e-08, "loss": 0.4009, "step": 19129 }, { "epoch": 2.78, "grad_norm": 8.64017105102539, "learning_rate": 2.979113141928935e-08, "loss": 0.3727, "step": 19130 }, { "epoch": 2.78, "grad_norm": 8.758896827697754, "learning_rate": 2.975279645273976e-08, "loss": 0.328, "step": 19131 }, { "epoch": 2.78, "grad_norm": 8.418850898742676, "learning_rate": 2.9714485794157474e-08, "loss": 0.3154, "step": 19132 }, { "epoch": 2.78, "grad_norm": 7.6133904457092285, "learning_rate": 2.967619944450206e-08, "loss": 0.3683, "step": 19133 }, { "epoch": 2.78, "grad_norm": 8.094061851501465, "learning_rate": 2.963793740473308e-08, "loss": 0.3238, "step": 19134 }, { "epoch": 2.78, "grad_norm": 9.042851448059082, "learning_rate": 2.9599699675808774e-08, "loss": 0.4098, "step": 19135 }, { "epoch": 2.78, "grad_norm": 8.692399024963379, "learning_rate": 2.9561486258687153e-08, "loss": 0.3707, "step": 19136 }, { "epoch": 2.78, "grad_norm": 8.211543083190918, "learning_rate": 2.9523297154325887e-08, "loss": 0.3249, "step": 19137 }, { "epoch": 2.78, "grad_norm": 7.625728607177734, "learning_rate": 2.948513236368133e-08, "loss": 0.3263, "step": 19138 }, { "epoch": 2.78, "grad_norm": 8.797258377075195, "learning_rate": 2.944699188771005e-08, "loss": 0.3697, "step": 19139 }, { "epoch": 2.78, "grad_norm": 7.965794563293457, "learning_rate": 2.9408875727367387e-08, "loss": 0.3619, "step": 19140 }, { "epoch": 2.78, "grad_norm": 8.269857406616211, "learning_rate": 2.9370783883608253e-08, "loss": 0.3275, "step": 19141 }, { "epoch": 2.78, "grad_norm": 7.930726528167725, "learning_rate": 2.9332716357386988e-08, "loss": 0.3582, "step": 19142 }, { "epoch": 2.78, "grad_norm": 8.058189392089844, "learning_rate": 2.929467314965739e-08, "loss": 0.3606, "step": 19143 }, { "epoch": 2.78, "grad_norm": 8.747032165527344, "learning_rate": 2.925665426137247e-08, "loss": 0.403, "step": 19144 }, { "epoch": 2.78, "grad_norm": 9.436681747436523, "learning_rate": 2.9218659693484915e-08, "loss": 0.4015, "step": 19145 }, { "epoch": 2.78, "grad_norm": 9.32604694366455, "learning_rate": 2.9180689446946406e-08, "loss": 0.3763, "step": 19146 }, { "epoch": 2.78, "grad_norm": 7.859105587005615, "learning_rate": 2.914274352270829e-08, "loss": 0.3008, "step": 19147 }, { "epoch": 2.78, "grad_norm": 8.397581100463867, "learning_rate": 2.9104821921721256e-08, "loss": 0.2912, "step": 19148 }, { "epoch": 2.78, "grad_norm": 8.956262588500977, "learning_rate": 2.9066924644935542e-08, "loss": 0.4134, "step": 19149 }, { "epoch": 2.78, "grad_norm": 9.037240028381348, "learning_rate": 2.9029051693300168e-08, "loss": 0.3842, "step": 19150 }, { "epoch": 2.78, "grad_norm": 9.016873359680176, "learning_rate": 2.899120306776437e-08, "loss": 0.3807, "step": 19151 }, { "epoch": 2.78, "grad_norm": 7.981494903564453, "learning_rate": 2.8953378769276172e-08, "loss": 0.3583, "step": 19152 }, { "epoch": 2.78, "grad_norm": 10.088484764099121, "learning_rate": 2.891557879878348e-08, "loss": 0.4422, "step": 19153 }, { "epoch": 2.78, "grad_norm": 8.11290168762207, "learning_rate": 2.887780315723298e-08, "loss": 0.3659, "step": 19154 }, { "epoch": 2.78, "grad_norm": 8.58747386932373, "learning_rate": 2.8840051845571146e-08, "loss": 0.3712, "step": 19155 }, { "epoch": 2.78, "grad_norm": 8.215022087097168, "learning_rate": 2.880232486474399e-08, "loss": 0.298, "step": 19156 }, { "epoch": 2.78, "grad_norm": 8.676898956298828, "learning_rate": 2.8764622215696422e-08, "loss": 0.402, "step": 19157 }, { "epoch": 2.78, "grad_norm": 8.963915824890137, "learning_rate": 2.8726943899373245e-08, "loss": 0.3706, "step": 19158 }, { "epoch": 2.78, "grad_norm": 8.012594223022461, "learning_rate": 2.868928991671826e-08, "loss": 0.3171, "step": 19159 }, { "epoch": 2.78, "grad_norm": 9.512140274047852, "learning_rate": 2.8651660268674938e-08, "loss": 0.3427, "step": 19160 }, { "epoch": 2.78, "grad_norm": 10.56797981262207, "learning_rate": 2.8614054956186074e-08, "loss": 0.3788, "step": 19161 }, { "epoch": 2.78, "grad_norm": 8.492774963378906, "learning_rate": 2.8576473980193583e-08, "loss": 0.3518, "step": 19162 }, { "epoch": 2.78, "grad_norm": 8.556243896484375, "learning_rate": 2.853891734163927e-08, "loss": 0.3505, "step": 19163 }, { "epoch": 2.78, "grad_norm": 9.532258987426758, "learning_rate": 2.850138504146382e-08, "loss": 0.3731, "step": 19164 }, { "epoch": 2.78, "grad_norm": 8.724767684936523, "learning_rate": 2.846387708060771e-08, "loss": 0.3488, "step": 19165 }, { "epoch": 2.78, "grad_norm": 8.898292541503906, "learning_rate": 2.8426393460010523e-08, "loss": 0.3571, "step": 19166 }, { "epoch": 2.78, "grad_norm": 9.059943199157715, "learning_rate": 2.83889341806115e-08, "loss": 0.3636, "step": 19167 }, { "epoch": 2.78, "grad_norm": 8.557453155517578, "learning_rate": 2.8351499243348897e-08, "loss": 0.3886, "step": 19168 }, { "epoch": 2.78, "grad_norm": 9.544122695922852, "learning_rate": 2.831408864916085e-08, "loss": 0.4272, "step": 19169 }, { "epoch": 2.78, "grad_norm": 9.433951377868652, "learning_rate": 2.8276702398984497e-08, "loss": 0.3866, "step": 19170 }, { "epoch": 2.78, "grad_norm": 8.13436508178711, "learning_rate": 2.8239340493756315e-08, "loss": 0.3279, "step": 19171 }, { "epoch": 2.78, "grad_norm": 8.194904327392578, "learning_rate": 2.820200293441277e-08, "loss": 0.358, "step": 19172 }, { "epoch": 2.78, "grad_norm": 9.69486141204834, "learning_rate": 2.816468972188879e-08, "loss": 0.3649, "step": 19173 }, { "epoch": 2.78, "grad_norm": 8.104801177978516, "learning_rate": 2.812740085711962e-08, "loss": 0.3428, "step": 19174 }, { "epoch": 2.78, "grad_norm": 8.5992431640625, "learning_rate": 2.8090136341039184e-08, "loss": 0.3728, "step": 19175 }, { "epoch": 2.78, "grad_norm": 8.10311222076416, "learning_rate": 2.8052896174581176e-08, "loss": 0.3112, "step": 19176 }, { "epoch": 2.78, "grad_norm": 8.747377395629883, "learning_rate": 2.801568035867874e-08, "loss": 0.373, "step": 19177 }, { "epoch": 2.78, "grad_norm": 8.193693161010742, "learning_rate": 2.7978488894264018e-08, "loss": 0.3649, "step": 19178 }, { "epoch": 2.78, "grad_norm": 9.353224754333496, "learning_rate": 2.7941321782268823e-08, "loss": 0.3573, "step": 19179 }, { "epoch": 2.78, "grad_norm": 10.127652168273926, "learning_rate": 2.790417902362452e-08, "loss": 0.4045, "step": 19180 }, { "epoch": 2.78, "grad_norm": 8.383734703063965, "learning_rate": 2.7867060619261473e-08, "loss": 0.3308, "step": 19181 }, { "epoch": 2.78, "grad_norm": 8.238438606262207, "learning_rate": 2.7829966570109608e-08, "loss": 0.3255, "step": 19182 }, { "epoch": 2.78, "grad_norm": 9.370409965515137, "learning_rate": 2.7792896877098404e-08, "loss": 0.3599, "step": 19183 }, { "epoch": 2.78, "grad_norm": 8.400914192199707, "learning_rate": 2.775585154115656e-08, "loss": 0.3763, "step": 19184 }, { "epoch": 2.78, "grad_norm": 8.05716323852539, "learning_rate": 2.7718830563211894e-08, "loss": 0.3266, "step": 19185 }, { "epoch": 2.78, "grad_norm": 8.12869644165039, "learning_rate": 2.7681833944192433e-08, "loss": 0.3757, "step": 19186 }, { "epoch": 2.78, "grad_norm": 9.768083572387695, "learning_rate": 2.7644861685024445e-08, "loss": 0.3056, "step": 19187 }, { "epoch": 2.78, "grad_norm": 9.874356269836426, "learning_rate": 2.760791378663474e-08, "loss": 0.4322, "step": 19188 }, { "epoch": 2.78, "grad_norm": 9.803616523742676, "learning_rate": 2.757099024994891e-08, "loss": 0.3502, "step": 19189 }, { "epoch": 2.78, "grad_norm": 9.712199211120605, "learning_rate": 2.753409107589155e-08, "loss": 0.3229, "step": 19190 }, { "epoch": 2.78, "grad_norm": 8.012861251831055, "learning_rate": 2.7497216265387814e-08, "loss": 0.3249, "step": 19191 }, { "epoch": 2.78, "grad_norm": 8.190552711486816, "learning_rate": 2.7460365819360844e-08, "loss": 0.3737, "step": 19192 }, { "epoch": 2.78, "grad_norm": 9.025200843811035, "learning_rate": 2.7423539738734458e-08, "loss": 0.335, "step": 19193 }, { "epoch": 2.78, "grad_norm": 8.639331817626953, "learning_rate": 2.738673802443081e-08, "loss": 0.3479, "step": 19194 }, { "epoch": 2.79, "grad_norm": 9.039106369018555, "learning_rate": 2.7349960677372275e-08, "loss": 0.3738, "step": 19195 }, { "epoch": 2.79, "grad_norm": 9.947113037109375, "learning_rate": 2.7313207698480002e-08, "loss": 0.4352, "step": 19196 }, { "epoch": 2.79, "grad_norm": 7.474049091339111, "learning_rate": 2.727647908867481e-08, "loss": 0.3048, "step": 19197 }, { "epoch": 2.79, "grad_norm": 8.77159595489502, "learning_rate": 2.7239774848876963e-08, "loss": 0.3105, "step": 19198 }, { "epoch": 2.79, "grad_norm": 10.153377532958984, "learning_rate": 2.7203094980006058e-08, "loss": 0.394, "step": 19199 }, { "epoch": 2.79, "grad_norm": 8.177502632141113, "learning_rate": 2.7166439482980917e-08, "loss": 0.2992, "step": 19200 }, { "epoch": 2.79, "grad_norm": 8.595165252685547, "learning_rate": 2.712980835871992e-08, "loss": 0.3681, "step": 19201 }, { "epoch": 2.79, "grad_norm": 9.380468368530273, "learning_rate": 2.709320160814088e-08, "loss": 0.3648, "step": 19202 }, { "epoch": 2.79, "grad_norm": 8.690176010131836, "learning_rate": 2.7056619232160848e-08, "loss": 0.3505, "step": 19203 }, { "epoch": 2.79, "grad_norm": 9.08702278137207, "learning_rate": 2.7020061231696422e-08, "loss": 0.369, "step": 19204 }, { "epoch": 2.79, "grad_norm": 8.103711128234863, "learning_rate": 2.6983527607663425e-08, "loss": 0.305, "step": 19205 }, { "epoch": 2.79, "grad_norm": 10.372101783752441, "learning_rate": 2.6947018360977016e-08, "loss": 0.3764, "step": 19206 }, { "epoch": 2.79, "grad_norm": 9.495779991149902, "learning_rate": 2.6910533492552235e-08, "loss": 0.3808, "step": 19207 }, { "epoch": 2.79, "grad_norm": 8.37137508392334, "learning_rate": 2.6874073003302687e-08, "loss": 0.2906, "step": 19208 }, { "epoch": 2.79, "grad_norm": 8.48581314086914, "learning_rate": 2.68376368941422e-08, "loss": 0.3926, "step": 19209 }, { "epoch": 2.79, "grad_norm": 9.58115291595459, "learning_rate": 2.6801225165983486e-08, "loss": 0.4271, "step": 19210 }, { "epoch": 2.79, "grad_norm": 7.776295185089111, "learning_rate": 2.6764837819738815e-08, "loss": 0.324, "step": 19211 }, { "epoch": 2.79, "grad_norm": 8.103081703186035, "learning_rate": 2.6728474856319795e-08, "loss": 0.3487, "step": 19212 }, { "epoch": 2.79, "grad_norm": 7.979985237121582, "learning_rate": 2.6692136276637465e-08, "loss": 0.313, "step": 19213 }, { "epoch": 2.79, "grad_norm": 7.592923164367676, "learning_rate": 2.665582208160211e-08, "loss": 0.319, "step": 19214 }, { "epoch": 2.79, "grad_norm": 8.650001525878906, "learning_rate": 2.6619532272123544e-08, "loss": 0.3868, "step": 19215 }, { "epoch": 2.79, "grad_norm": 9.090112686157227, "learning_rate": 2.6583266849111274e-08, "loss": 0.3093, "step": 19216 }, { "epoch": 2.79, "grad_norm": 9.147971153259277, "learning_rate": 2.6547025813473456e-08, "loss": 0.3326, "step": 19217 }, { "epoch": 2.79, "grad_norm": 10.095714569091797, "learning_rate": 2.6510809166118252e-08, "loss": 0.4032, "step": 19218 }, { "epoch": 2.79, "grad_norm": 9.283598899841309, "learning_rate": 2.647461690795294e-08, "loss": 0.3717, "step": 19219 }, { "epoch": 2.79, "grad_norm": 8.885029792785645, "learning_rate": 2.6438449039884236e-08, "loss": 0.3265, "step": 19220 }, { "epoch": 2.79, "grad_norm": 8.02608871459961, "learning_rate": 2.6402305562818415e-08, "loss": 0.3701, "step": 19221 }, { "epoch": 2.79, "grad_norm": 10.093194007873535, "learning_rate": 2.6366186477660867e-08, "loss": 0.3732, "step": 19222 }, { "epoch": 2.79, "grad_norm": 8.287211418151855, "learning_rate": 2.6330091785316533e-08, "loss": 0.3366, "step": 19223 }, { "epoch": 2.79, "grad_norm": 9.30458927154541, "learning_rate": 2.6294021486689687e-08, "loss": 0.315, "step": 19224 }, { "epoch": 2.79, "grad_norm": 7.9389424324035645, "learning_rate": 2.6257975582684054e-08, "loss": 0.3462, "step": 19225 }, { "epoch": 2.79, "grad_norm": 9.357623100280762, "learning_rate": 2.622195407420269e-08, "loss": 0.4012, "step": 19226 }, { "epoch": 2.79, "grad_norm": 8.109392166137695, "learning_rate": 2.618595696214798e-08, "loss": 0.3256, "step": 19227 }, { "epoch": 2.79, "grad_norm": 9.362627983093262, "learning_rate": 2.6149984247421984e-08, "loss": 0.3814, "step": 19228 }, { "epoch": 2.79, "grad_norm": 8.078896522521973, "learning_rate": 2.6114035930925538e-08, "loss": 0.3251, "step": 19229 }, { "epoch": 2.79, "grad_norm": 10.55624771118164, "learning_rate": 2.6078112013559806e-08, "loss": 0.3768, "step": 19230 }, { "epoch": 2.79, "grad_norm": 8.361678123474121, "learning_rate": 2.6042212496224403e-08, "loss": 0.4036, "step": 19231 }, { "epoch": 2.79, "grad_norm": 7.780178546905518, "learning_rate": 2.6006337379818722e-08, "loss": 0.2742, "step": 19232 }, { "epoch": 2.79, "grad_norm": 8.510382652282715, "learning_rate": 2.597048666524182e-08, "loss": 0.3328, "step": 19233 }, { "epoch": 2.79, "grad_norm": 8.341731071472168, "learning_rate": 2.593466035339176e-08, "loss": 0.34, "step": 19234 }, { "epoch": 2.79, "grad_norm": 8.521385192871094, "learning_rate": 2.5898858445166037e-08, "loss": 0.3957, "step": 19235 }, { "epoch": 2.79, "grad_norm": 8.927745819091797, "learning_rate": 2.5863080941461613e-08, "loss": 0.3393, "step": 19236 }, { "epoch": 2.79, "grad_norm": 9.09577751159668, "learning_rate": 2.5827327843174985e-08, "loss": 0.3467, "step": 19237 }, { "epoch": 2.79, "grad_norm": 8.618836402893066, "learning_rate": 2.5791599151201772e-08, "loss": 0.3879, "step": 19238 }, { "epoch": 2.79, "grad_norm": 8.63719367980957, "learning_rate": 2.5755894866437144e-08, "loss": 0.4056, "step": 19239 }, { "epoch": 2.79, "grad_norm": 8.3496732711792, "learning_rate": 2.5720214989775502e-08, "loss": 0.3317, "step": 19240 }, { "epoch": 2.79, "grad_norm": 8.311230659484863, "learning_rate": 2.5684559522110903e-08, "loss": 0.3335, "step": 19241 }, { "epoch": 2.79, "grad_norm": 9.185420036315918, "learning_rate": 2.564892846433664e-08, "loss": 0.3722, "step": 19242 }, { "epoch": 2.79, "grad_norm": 7.776011943817139, "learning_rate": 2.5613321817345212e-08, "loss": 0.3403, "step": 19243 }, { "epoch": 2.79, "grad_norm": 8.83499526977539, "learning_rate": 2.5577739582028802e-08, "loss": 0.3811, "step": 19244 }, { "epoch": 2.79, "grad_norm": 9.830589294433594, "learning_rate": 2.5542181759278914e-08, "loss": 0.4209, "step": 19245 }, { "epoch": 2.79, "grad_norm": 8.516829490661621, "learning_rate": 2.5506648349986283e-08, "loss": 0.3224, "step": 19246 }, { "epoch": 2.79, "grad_norm": 8.031009674072266, "learning_rate": 2.5471139355041306e-08, "loss": 0.3686, "step": 19247 }, { "epoch": 2.79, "grad_norm": 7.175673961639404, "learning_rate": 2.5435654775333272e-08, "loss": 0.3651, "step": 19248 }, { "epoch": 2.79, "grad_norm": 8.365351676940918, "learning_rate": 2.540019461175158e-08, "loss": 0.3786, "step": 19249 }, { "epoch": 2.79, "grad_norm": 9.45430850982666, "learning_rate": 2.53647588651843e-08, "loss": 0.3498, "step": 19250 }, { "epoch": 2.79, "grad_norm": 7.163179874420166, "learning_rate": 2.532934753651961e-08, "loss": 0.3151, "step": 19251 }, { "epoch": 2.79, "grad_norm": 9.099014282226562, "learning_rate": 2.529396062664424e-08, "loss": 0.3166, "step": 19252 }, { "epoch": 2.79, "grad_norm": 8.8040189743042, "learning_rate": 2.5258598136445042e-08, "loss": 0.3337, "step": 19253 }, { "epoch": 2.79, "grad_norm": 9.726505279541016, "learning_rate": 2.522326006680786e-08, "loss": 0.2971, "step": 19254 }, { "epoch": 2.79, "grad_norm": 8.09591293334961, "learning_rate": 2.51879464186181e-08, "loss": 0.3351, "step": 19255 }, { "epoch": 2.79, "grad_norm": 9.654163360595703, "learning_rate": 2.5152657192760383e-08, "loss": 0.3861, "step": 19256 }, { "epoch": 2.79, "grad_norm": 8.51672649383545, "learning_rate": 2.5117392390118896e-08, "loss": 0.3707, "step": 19257 }, { "epoch": 2.79, "grad_norm": 8.542154312133789, "learning_rate": 2.508215201157704e-08, "loss": 0.336, "step": 19258 }, { "epoch": 2.79, "grad_norm": 8.1996431350708, "learning_rate": 2.5046936058017888e-08, "loss": 0.3355, "step": 19259 }, { "epoch": 2.79, "grad_norm": 8.272236824035645, "learning_rate": 2.501174453032362e-08, "loss": 0.3376, "step": 19260 }, { "epoch": 2.79, "grad_norm": 8.385780334472656, "learning_rate": 2.4976577429375976e-08, "loss": 0.3097, "step": 19261 }, { "epoch": 2.79, "grad_norm": 7.169750213623047, "learning_rate": 2.49414347560557e-08, "loss": 0.3056, "step": 19262 }, { "epoch": 2.79, "grad_norm": 8.794862747192383, "learning_rate": 2.4906316511243752e-08, "loss": 0.3613, "step": 19263 }, { "epoch": 2.8, "grad_norm": 7.869973659515381, "learning_rate": 2.4871222695819427e-08, "loss": 0.3249, "step": 19264 }, { "epoch": 2.8, "grad_norm": 8.599052429199219, "learning_rate": 2.4836153310662466e-08, "loss": 0.3556, "step": 19265 }, { "epoch": 2.8, "grad_norm": 9.192248344421387, "learning_rate": 2.4801108356651057e-08, "loss": 0.3752, "step": 19266 }, { "epoch": 2.8, "grad_norm": 8.18850326538086, "learning_rate": 2.4766087834663162e-08, "loss": 0.3365, "step": 19267 }, { "epoch": 2.8, "grad_norm": 10.576663970947266, "learning_rate": 2.473109174557664e-08, "loss": 0.351, "step": 19268 }, { "epoch": 2.8, "grad_norm": 9.395116806030273, "learning_rate": 2.4696120090267782e-08, "loss": 0.3578, "step": 19269 }, { "epoch": 2.8, "grad_norm": 8.605846405029297, "learning_rate": 2.4661172869613112e-08, "loss": 0.3181, "step": 19270 }, { "epoch": 2.8, "grad_norm": 8.691205978393555, "learning_rate": 2.4626250084487932e-08, "loss": 0.3052, "step": 19271 }, { "epoch": 2.8, "grad_norm": 9.955693244934082, "learning_rate": 2.4591351735767207e-08, "loss": 0.4483, "step": 19272 }, { "epoch": 2.8, "grad_norm": 8.279886245727539, "learning_rate": 2.455647782432535e-08, "loss": 0.3796, "step": 19273 }, { "epoch": 2.8, "grad_norm": 9.064759254455566, "learning_rate": 2.4521628351035884e-08, "loss": 0.3502, "step": 19274 }, { "epoch": 2.8, "grad_norm": 8.378543853759766, "learning_rate": 2.4486803316772107e-08, "loss": 0.3516, "step": 19275 }, { "epoch": 2.8, "grad_norm": 9.105199813842773, "learning_rate": 2.445200272240644e-08, "loss": 0.349, "step": 19276 }, { "epoch": 2.8, "grad_norm": 8.164054870605469, "learning_rate": 2.4417226568810734e-08, "loss": 0.3151, "step": 19277 }, { "epoch": 2.8, "grad_norm": 8.614201545715332, "learning_rate": 2.4382474856856295e-08, "loss": 0.3129, "step": 19278 }, { "epoch": 2.8, "grad_norm": 8.064664840698242, "learning_rate": 2.4347747587413646e-08, "loss": 0.347, "step": 19279 }, { "epoch": 2.8, "grad_norm": 8.482763290405273, "learning_rate": 2.4313044761353098e-08, "loss": 0.3831, "step": 19280 }, { "epoch": 2.8, "grad_norm": 9.005194664001465, "learning_rate": 2.4278366379543726e-08, "loss": 0.3972, "step": 19281 }, { "epoch": 2.8, "grad_norm": 7.67824125289917, "learning_rate": 2.4243712442854612e-08, "loss": 0.2859, "step": 19282 }, { "epoch": 2.8, "grad_norm": 9.641937255859375, "learning_rate": 2.4209082952153625e-08, "loss": 0.3999, "step": 19283 }, { "epoch": 2.8, "grad_norm": 8.307737350463867, "learning_rate": 2.4174477908308844e-08, "loss": 0.3434, "step": 19284 }, { "epoch": 2.8, "grad_norm": 8.030646324157715, "learning_rate": 2.4139897312186797e-08, "loss": 0.3073, "step": 19285 }, { "epoch": 2.8, "grad_norm": 9.711908340454102, "learning_rate": 2.4105341164654125e-08, "loss": 0.3813, "step": 19286 }, { "epoch": 2.8, "grad_norm": 7.622352123260498, "learning_rate": 2.407080946657636e-08, "loss": 0.3077, "step": 19287 }, { "epoch": 2.8, "grad_norm": 8.48929500579834, "learning_rate": 2.4036302218818804e-08, "loss": 0.3277, "step": 19288 }, { "epoch": 2.8, "grad_norm": 8.413678169250488, "learning_rate": 2.4001819422245885e-08, "loss": 0.28, "step": 19289 }, { "epoch": 2.8, "grad_norm": 9.336977005004883, "learning_rate": 2.3967361077721683e-08, "loss": 0.3656, "step": 19290 }, { "epoch": 2.8, "grad_norm": 8.906652450561523, "learning_rate": 2.3932927186109176e-08, "loss": 0.3902, "step": 19291 }, { "epoch": 2.8, "grad_norm": 10.45635986328125, "learning_rate": 2.389851774827134e-08, "loss": 0.3707, "step": 19292 }, { "epoch": 2.8, "grad_norm": 9.291383743286133, "learning_rate": 2.3864132765070155e-08, "loss": 0.3165, "step": 19293 }, { "epoch": 2.8, "grad_norm": 8.422245979309082, "learning_rate": 2.382977223736704e-08, "loss": 0.3414, "step": 19294 }, { "epoch": 2.8, "grad_norm": 8.238997459411621, "learning_rate": 2.3795436166022864e-08, "loss": 0.3135, "step": 19295 }, { "epoch": 2.8, "grad_norm": 9.020101547241211, "learning_rate": 2.3761124551897826e-08, "loss": 0.3862, "step": 19296 }, { "epoch": 2.8, "grad_norm": 8.527213096618652, "learning_rate": 2.3726837395851685e-08, "loss": 0.3337, "step": 19297 }, { "epoch": 2.8, "grad_norm": 8.09487533569336, "learning_rate": 2.3692574698743305e-08, "loss": 0.3429, "step": 19298 }, { "epoch": 2.8, "grad_norm": 7.80986213684082, "learning_rate": 2.3658336461431118e-08, "loss": 0.3507, "step": 19299 }, { "epoch": 2.8, "grad_norm": 8.072737693786621, "learning_rate": 2.3624122684772874e-08, "loss": 0.3447, "step": 19300 }, { "epoch": 2.8, "grad_norm": 8.402990341186523, "learning_rate": 2.3589933369626004e-08, "loss": 0.3467, "step": 19301 }, { "epoch": 2.8, "grad_norm": 8.472738265991211, "learning_rate": 2.35557685168466e-08, "loss": 0.3243, "step": 19302 }, { "epoch": 2.8, "grad_norm": 8.744635581970215, "learning_rate": 2.3521628127291083e-08, "loss": 0.3621, "step": 19303 }, { "epoch": 2.8, "grad_norm": 8.408950805664062, "learning_rate": 2.348751220181433e-08, "loss": 0.3064, "step": 19304 }, { "epoch": 2.8, "grad_norm": 9.683248519897461, "learning_rate": 2.3453420741271546e-08, "loss": 0.3962, "step": 19305 }, { "epoch": 2.8, "grad_norm": 8.434233665466309, "learning_rate": 2.341935374651638e-08, "loss": 0.374, "step": 19306 }, { "epoch": 2.8, "grad_norm": 9.014140129089355, "learning_rate": 2.3385311218402592e-08, "loss": 0.376, "step": 19307 }, { "epoch": 2.8, "grad_norm": 8.250530242919922, "learning_rate": 2.335129315778306e-08, "loss": 0.337, "step": 19308 }, { "epoch": 2.8, "grad_norm": 10.1002197265625, "learning_rate": 2.3317299565509986e-08, "loss": 0.3994, "step": 19309 }, { "epoch": 2.8, "grad_norm": 7.733196258544922, "learning_rate": 2.328333044243491e-08, "loss": 0.3681, "step": 19310 }, { "epoch": 2.8, "grad_norm": 8.2207612991333, "learning_rate": 2.324938578940905e-08, "loss": 0.2917, "step": 19311 }, { "epoch": 2.8, "grad_norm": 8.39159107208252, "learning_rate": 2.3215465607282825e-08, "loss": 0.3445, "step": 19312 }, { "epoch": 2.8, "grad_norm": 8.442858695983887, "learning_rate": 2.3181569896906006e-08, "loss": 0.3552, "step": 19313 }, { "epoch": 2.8, "grad_norm": 8.11488151550293, "learning_rate": 2.314769865912769e-08, "loss": 0.3307, "step": 19314 }, { "epoch": 2.8, "grad_norm": 8.468586921691895, "learning_rate": 2.3113851894796642e-08, "loss": 0.3228, "step": 19315 }, { "epoch": 2.8, "grad_norm": 9.07264518737793, "learning_rate": 2.3080029604760744e-08, "loss": 0.319, "step": 19316 }, { "epoch": 2.8, "grad_norm": 7.601559638977051, "learning_rate": 2.304623178986742e-08, "loss": 0.3403, "step": 19317 }, { "epoch": 2.8, "grad_norm": 7.605027675628662, "learning_rate": 2.3012458450963334e-08, "loss": 0.2768, "step": 19318 }, { "epoch": 2.8, "grad_norm": 9.082650184631348, "learning_rate": 2.2978709588894584e-08, "loss": 0.3048, "step": 19319 }, { "epoch": 2.8, "grad_norm": 9.82041072845459, "learning_rate": 2.2944985204506827e-08, "loss": 0.4101, "step": 19320 }, { "epoch": 2.8, "grad_norm": 8.545003890991211, "learning_rate": 2.2911285298644945e-08, "loss": 0.3836, "step": 19321 }, { "epoch": 2.8, "grad_norm": 7.805996894836426, "learning_rate": 2.287760987215337e-08, "loss": 0.3115, "step": 19322 }, { "epoch": 2.8, "grad_norm": 8.739312171936035, "learning_rate": 2.2843958925875316e-08, "loss": 0.4062, "step": 19323 }, { "epoch": 2.8, "grad_norm": 9.016117095947266, "learning_rate": 2.2810332460654448e-08, "loss": 0.3534, "step": 19324 }, { "epoch": 2.8, "grad_norm": 10.081087112426758, "learning_rate": 2.277673047733264e-08, "loss": 0.441, "step": 19325 }, { "epoch": 2.8, "grad_norm": 9.280946731567383, "learning_rate": 2.2743152976752333e-08, "loss": 0.3385, "step": 19326 }, { "epoch": 2.8, "grad_norm": 8.311786651611328, "learning_rate": 2.27095999597543e-08, "loss": 0.3369, "step": 19327 }, { "epoch": 2.8, "grad_norm": 8.597519874572754, "learning_rate": 2.2676071427179312e-08, "loss": 0.3272, "step": 19328 }, { "epoch": 2.8, "grad_norm": 10.068653106689453, "learning_rate": 2.264256737986736e-08, "loss": 0.4493, "step": 19329 }, { "epoch": 2.8, "grad_norm": 8.719643592834473, "learning_rate": 2.260908781865778e-08, "loss": 0.3113, "step": 19330 }, { "epoch": 2.8, "grad_norm": 8.967238426208496, "learning_rate": 2.257563274438945e-08, "loss": 0.3118, "step": 19331 }, { "epoch": 2.8, "grad_norm": 9.784269332885742, "learning_rate": 2.2542202157900592e-08, "loss": 0.364, "step": 19332 }, { "epoch": 2.81, "grad_norm": 10.714786529541016, "learning_rate": 2.2508796060028535e-08, "loss": 0.454, "step": 19333 }, { "epoch": 2.81, "grad_norm": 8.009450912475586, "learning_rate": 2.2475414451610387e-08, "loss": 0.3432, "step": 19334 }, { "epoch": 2.81, "grad_norm": 9.953174591064453, "learning_rate": 2.2442057333482478e-08, "loss": 0.3791, "step": 19335 }, { "epoch": 2.81, "grad_norm": 10.074737548828125, "learning_rate": 2.2408724706480476e-08, "loss": 0.3541, "step": 19336 }, { "epoch": 2.81, "grad_norm": 8.23339557647705, "learning_rate": 2.2375416571439264e-08, "loss": 0.3365, "step": 19337 }, { "epoch": 2.81, "grad_norm": 8.347018241882324, "learning_rate": 2.2342132929193735e-08, "loss": 0.3089, "step": 19338 }, { "epoch": 2.81, "grad_norm": 7.571383953094482, "learning_rate": 2.230887378057733e-08, "loss": 0.3093, "step": 19339 }, { "epoch": 2.81, "grad_norm": 8.65953540802002, "learning_rate": 2.227563912642372e-08, "loss": 0.359, "step": 19340 }, { "epoch": 2.81, "grad_norm": 8.390602111816406, "learning_rate": 2.224242896756512e-08, "loss": 0.3758, "step": 19341 }, { "epoch": 2.81, "grad_norm": 10.873505592346191, "learning_rate": 2.220924330483398e-08, "loss": 0.5003, "step": 19342 }, { "epoch": 2.81, "grad_norm": 8.679036140441895, "learning_rate": 2.2176082139061415e-08, "loss": 0.3719, "step": 19343 }, { "epoch": 2.81, "grad_norm": 9.026529312133789, "learning_rate": 2.214294547107831e-08, "loss": 0.3464, "step": 19344 }, { "epoch": 2.81, "grad_norm": 8.553215026855469, "learning_rate": 2.2109833301714897e-08, "loss": 0.34, "step": 19345 }, { "epoch": 2.81, "grad_norm": 8.546167373657227, "learning_rate": 2.207674563180062e-08, "loss": 0.3533, "step": 19346 }, { "epoch": 2.81, "grad_norm": 9.074254035949707, "learning_rate": 2.2043682462164592e-08, "loss": 0.4137, "step": 19347 }, { "epoch": 2.81, "grad_norm": 9.175774574279785, "learning_rate": 2.2010643793635153e-08, "loss": 0.3577, "step": 19348 }, { "epoch": 2.81, "grad_norm": 8.410090446472168, "learning_rate": 2.1977629627039864e-08, "loss": 0.3391, "step": 19349 }, { "epoch": 2.81, "grad_norm": 8.527642250061035, "learning_rate": 2.1944639963205948e-08, "loss": 0.294, "step": 19350 }, { "epoch": 2.81, "grad_norm": 8.817873001098633, "learning_rate": 2.1911674802959967e-08, "loss": 0.3589, "step": 19351 }, { "epoch": 2.81, "grad_norm": 9.534770965576172, "learning_rate": 2.187873414712771e-08, "loss": 0.3368, "step": 19352 }, { "epoch": 2.81, "grad_norm": 10.123417854309082, "learning_rate": 2.18458179965344e-08, "loss": 0.4263, "step": 19353 }, { "epoch": 2.81, "grad_norm": 8.848573684692383, "learning_rate": 2.1812926352004934e-08, "loss": 0.3642, "step": 19354 }, { "epoch": 2.81, "grad_norm": 7.7000322341918945, "learning_rate": 2.17800592143631e-08, "loss": 0.3021, "step": 19355 }, { "epoch": 2.81, "grad_norm": 9.238471984863281, "learning_rate": 2.174721658443257e-08, "loss": 0.3376, "step": 19356 }, { "epoch": 2.81, "grad_norm": 8.521547317504883, "learning_rate": 2.171439846303602e-08, "loss": 0.2842, "step": 19357 }, { "epoch": 2.81, "grad_norm": 9.107976913452148, "learning_rate": 2.168160485099546e-08, "loss": 0.4108, "step": 19358 }, { "epoch": 2.81, "grad_norm": 10.474651336669922, "learning_rate": 2.16488357491329e-08, "loss": 0.4668, "step": 19359 }, { "epoch": 2.81, "grad_norm": 10.16220760345459, "learning_rate": 2.1616091158269012e-08, "loss": 0.4128, "step": 19360 }, { "epoch": 2.81, "grad_norm": 8.738982200622559, "learning_rate": 2.1583371079224366e-08, "loss": 0.3396, "step": 19361 }, { "epoch": 2.81, "grad_norm": 9.642441749572754, "learning_rate": 2.1550675512818417e-08, "loss": 0.3492, "step": 19362 }, { "epoch": 2.81, "grad_norm": 9.080771446228027, "learning_rate": 2.151800445987062e-08, "loss": 0.3219, "step": 19363 }, { "epoch": 2.81, "grad_norm": 9.946995735168457, "learning_rate": 2.1485357921199322e-08, "loss": 0.415, "step": 19364 }, { "epoch": 2.81, "grad_norm": 9.498408317565918, "learning_rate": 2.145273589762253e-08, "loss": 0.3152, "step": 19365 }, { "epoch": 2.81, "grad_norm": 7.902754306793213, "learning_rate": 2.1420138389957376e-08, "loss": 0.3489, "step": 19366 }, { "epoch": 2.81, "grad_norm": 8.572948455810547, "learning_rate": 2.1387565399020648e-08, "loss": 0.3134, "step": 19367 }, { "epoch": 2.81, "grad_norm": 7.847619533538818, "learning_rate": 2.1355016925628354e-08, "loss": 0.3305, "step": 19368 }, { "epoch": 2.81, "grad_norm": 8.586764335632324, "learning_rate": 2.132249297059596e-08, "loss": 0.3695, "step": 19369 }, { "epoch": 2.81, "grad_norm": 9.779170989990234, "learning_rate": 2.1289993534738482e-08, "loss": 0.3504, "step": 19370 }, { "epoch": 2.81, "grad_norm": 9.847229957580566, "learning_rate": 2.125751861886982e-08, "loss": 0.4298, "step": 19371 }, { "epoch": 2.81, "grad_norm": 8.624520301818848, "learning_rate": 2.1225068223803765e-08, "loss": 0.3932, "step": 19372 }, { "epoch": 2.81, "grad_norm": 8.225221633911133, "learning_rate": 2.119264235035334e-08, "loss": 0.3205, "step": 19373 }, { "epoch": 2.81, "grad_norm": 7.034255027770996, "learning_rate": 2.116024099933078e-08, "loss": 0.283, "step": 19374 }, { "epoch": 2.81, "grad_norm": 8.436832427978516, "learning_rate": 2.1127864171547993e-08, "loss": 0.3485, "step": 19375 }, { "epoch": 2.81, "grad_norm": 9.572644233703613, "learning_rate": 2.1095511867816107e-08, "loss": 0.3856, "step": 19376 }, { "epoch": 2.81, "grad_norm": 7.479979515075684, "learning_rate": 2.1063184088945584e-08, "loss": 0.3075, "step": 19377 }, { "epoch": 2.81, "grad_norm": 8.875731468200684, "learning_rate": 2.1030880835746445e-08, "loss": 0.4188, "step": 19378 }, { "epoch": 2.81, "grad_norm": 7.645060062408447, "learning_rate": 2.0998602109027818e-08, "loss": 0.3173, "step": 19379 }, { "epoch": 2.81, "grad_norm": 8.231444358825684, "learning_rate": 2.0966347909598614e-08, "loss": 0.3327, "step": 19380 }, { "epoch": 2.81, "grad_norm": 7.528254508972168, "learning_rate": 2.0934118238266628e-08, "loss": 0.3252, "step": 19381 }, { "epoch": 2.81, "grad_norm": 10.11182975769043, "learning_rate": 2.0901913095839663e-08, "loss": 0.3908, "step": 19382 }, { "epoch": 2.81, "grad_norm": 7.530398845672607, "learning_rate": 2.0869732483124404e-08, "loss": 0.3058, "step": 19383 }, { "epoch": 2.81, "grad_norm": 8.978252410888672, "learning_rate": 2.0837576400926983e-08, "loss": 0.3226, "step": 19384 }, { "epoch": 2.81, "grad_norm": 8.572709083557129, "learning_rate": 2.080544485005309e-08, "loss": 0.3671, "step": 19385 }, { "epoch": 2.81, "grad_norm": 9.42802619934082, "learning_rate": 2.077333783130786e-08, "loss": 0.4285, "step": 19386 }, { "epoch": 2.81, "grad_norm": 8.4113130569458, "learning_rate": 2.0741255345495533e-08, "loss": 0.3437, "step": 19387 }, { "epoch": 2.81, "grad_norm": 9.48148250579834, "learning_rate": 2.070919739341981e-08, "loss": 0.3417, "step": 19388 }, { "epoch": 2.81, "grad_norm": 8.059615135192871, "learning_rate": 2.0677163975884148e-08, "loss": 0.3157, "step": 19389 }, { "epoch": 2.81, "grad_norm": 7.942187786102295, "learning_rate": 2.06451550936908e-08, "loss": 0.262, "step": 19390 }, { "epoch": 2.81, "grad_norm": 8.464921951293945, "learning_rate": 2.0613170747641795e-08, "loss": 0.3366, "step": 19391 }, { "epoch": 2.81, "grad_norm": 10.238420486450195, "learning_rate": 2.0581210938538596e-08, "loss": 0.3283, "step": 19392 }, { "epoch": 2.81, "grad_norm": 9.615071296691895, "learning_rate": 2.0549275667181564e-08, "loss": 0.3868, "step": 19393 }, { "epoch": 2.81, "grad_norm": 8.121408462524414, "learning_rate": 2.0517364934371174e-08, "loss": 0.3274, "step": 19394 }, { "epoch": 2.81, "grad_norm": 9.40074634552002, "learning_rate": 2.048547874090656e-08, "loss": 0.3413, "step": 19395 }, { "epoch": 2.81, "grad_norm": 8.283782005310059, "learning_rate": 2.045361708758697e-08, "loss": 0.3239, "step": 19396 }, { "epoch": 2.81, "grad_norm": 8.797224044799805, "learning_rate": 2.0421779975210107e-08, "loss": 0.3173, "step": 19397 }, { "epoch": 2.81, "grad_norm": 7.531692981719971, "learning_rate": 2.0389967404573993e-08, "loss": 0.3209, "step": 19398 }, { "epoch": 2.81, "grad_norm": 7.338016510009766, "learning_rate": 2.035817937647577e-08, "loss": 0.293, "step": 19399 }, { "epoch": 2.81, "grad_norm": 9.153239250183105, "learning_rate": 2.0326415891711357e-08, "loss": 0.3955, "step": 19400 }, { "epoch": 2.82, "grad_norm": 7.994222640991211, "learning_rate": 2.0294676951076896e-08, "loss": 0.3209, "step": 19401 }, { "epoch": 2.82, "grad_norm": 10.081145286560059, "learning_rate": 2.0262962555367302e-08, "loss": 0.3791, "step": 19402 }, { "epoch": 2.82, "grad_norm": 9.455415725708008, "learning_rate": 2.02312727053775e-08, "loss": 0.4014, "step": 19403 }, { "epoch": 2.82, "grad_norm": 8.013840675354004, "learning_rate": 2.0199607401901185e-08, "loss": 0.3572, "step": 19404 }, { "epoch": 2.82, "grad_norm": 7.526447772979736, "learning_rate": 2.0167966645731615e-08, "loss": 0.2933, "step": 19405 }, { "epoch": 2.82, "grad_norm": 8.666451454162598, "learning_rate": 2.0136350437661485e-08, "loss": 0.3423, "step": 19406 }, { "epoch": 2.82, "grad_norm": 8.29165267944336, "learning_rate": 2.0104758778483166e-08, "loss": 0.3377, "step": 19407 }, { "epoch": 2.82, "grad_norm": 9.415135383605957, "learning_rate": 2.0073191668987798e-08, "loss": 0.338, "step": 19408 }, { "epoch": 2.82, "grad_norm": 8.599841117858887, "learning_rate": 2.004164910996653e-08, "loss": 0.3581, "step": 19409 }, { "epoch": 2.82, "grad_norm": 9.578269958496094, "learning_rate": 2.0010131102209392e-08, "loss": 0.3926, "step": 19410 }, { "epoch": 2.82, "grad_norm": 7.808862209320068, "learning_rate": 1.9978637646506093e-08, "loss": 0.3604, "step": 19411 }, { "epoch": 2.82, "grad_norm": 9.085715293884277, "learning_rate": 1.994716874364577e-08, "loss": 0.3512, "step": 19412 }, { "epoch": 2.82, "grad_norm": 9.57107925415039, "learning_rate": 1.991572439441669e-08, "loss": 0.3866, "step": 19413 }, { "epoch": 2.82, "grad_norm": 9.569110870361328, "learning_rate": 1.988430459960655e-08, "loss": 0.3685, "step": 19414 }, { "epoch": 2.82, "grad_norm": 9.847317695617676, "learning_rate": 1.9852909360002724e-08, "loss": 0.3871, "step": 19415 }, { "epoch": 2.82, "grad_norm": 8.632491111755371, "learning_rate": 1.982153867639158e-08, "loss": 0.3394, "step": 19416 }, { "epoch": 2.82, "grad_norm": 7.762871742248535, "learning_rate": 1.9790192549559382e-08, "loss": 0.3214, "step": 19417 }, { "epoch": 2.82, "grad_norm": 8.208725929260254, "learning_rate": 1.9758870980291052e-08, "loss": 0.3191, "step": 19418 }, { "epoch": 2.82, "grad_norm": 7.905083656311035, "learning_rate": 1.9727573969371526e-08, "loss": 0.3304, "step": 19419 }, { "epoch": 2.82, "grad_norm": 10.085419654846191, "learning_rate": 1.9696301517584945e-08, "loss": 0.4356, "step": 19420 }, { "epoch": 2.82, "grad_norm": 7.842339038848877, "learning_rate": 1.966505362571458e-08, "loss": 0.305, "step": 19421 }, { "epoch": 2.82, "grad_norm": 8.209704399108887, "learning_rate": 1.9633830294543463e-08, "loss": 0.3126, "step": 19422 }, { "epoch": 2.82, "grad_norm": 9.46609115600586, "learning_rate": 1.9602631524853753e-08, "loss": 0.3972, "step": 19423 }, { "epoch": 2.82, "grad_norm": 9.62923526763916, "learning_rate": 1.9571457317427265e-08, "loss": 0.3766, "step": 19424 }, { "epoch": 2.82, "grad_norm": 8.78805923461914, "learning_rate": 1.954030767304482e-08, "loss": 0.3525, "step": 19425 }, { "epoch": 2.82, "grad_norm": 7.926732063293457, "learning_rate": 1.9509182592486796e-08, "loss": 0.3685, "step": 19426 }, { "epoch": 2.82, "grad_norm": 7.835421085357666, "learning_rate": 1.947808207653312e-08, "loss": 0.3281, "step": 19427 }, { "epoch": 2.82, "grad_norm": 9.025979042053223, "learning_rate": 1.9447006125962952e-08, "loss": 0.3383, "step": 19428 }, { "epoch": 2.82, "grad_norm": 7.515485763549805, "learning_rate": 1.9415954741554884e-08, "loss": 0.3138, "step": 19429 }, { "epoch": 2.82, "grad_norm": 9.833362579345703, "learning_rate": 1.9384927924086636e-08, "loss": 0.3789, "step": 19430 }, { "epoch": 2.82, "grad_norm": 8.740095138549805, "learning_rate": 1.9353925674335802e-08, "loss": 0.3513, "step": 19431 }, { "epoch": 2.82, "grad_norm": 8.43749713897705, "learning_rate": 1.9322947993078874e-08, "loss": 0.3221, "step": 19432 }, { "epoch": 2.82, "grad_norm": 8.763965606689453, "learning_rate": 1.9291994881092122e-08, "loss": 0.3221, "step": 19433 }, { "epoch": 2.82, "grad_norm": 8.79094409942627, "learning_rate": 1.9261066339151033e-08, "loss": 0.3691, "step": 19434 }, { "epoch": 2.82, "grad_norm": 8.535762786865234, "learning_rate": 1.923016236803021e-08, "loss": 0.3799, "step": 19435 }, { "epoch": 2.82, "grad_norm": 8.242321014404297, "learning_rate": 1.919928296850426e-08, "loss": 0.3375, "step": 19436 }, { "epoch": 2.82, "grad_norm": 9.380327224731445, "learning_rate": 1.916842814134656e-08, "loss": 0.3883, "step": 19437 }, { "epoch": 2.82, "grad_norm": 7.77578067779541, "learning_rate": 1.913759788733027e-08, "loss": 0.3379, "step": 19438 }, { "epoch": 2.82, "grad_norm": 9.210453033447266, "learning_rate": 1.9106792207227663e-08, "loss": 0.3404, "step": 19439 }, { "epoch": 2.82, "grad_norm": 8.981411933898926, "learning_rate": 1.9076011101810563e-08, "loss": 0.3264, "step": 19440 }, { "epoch": 2.82, "grad_norm": 9.020145416259766, "learning_rate": 1.9045254571850244e-08, "loss": 0.3993, "step": 19441 }, { "epoch": 2.82, "grad_norm": 8.579163551330566, "learning_rate": 1.9014522618117203e-08, "loss": 0.362, "step": 19442 }, { "epoch": 2.82, "grad_norm": 8.702842712402344, "learning_rate": 1.8983815241381262e-08, "loss": 0.3221, "step": 19443 }, { "epoch": 2.82, "grad_norm": 8.282382011413574, "learning_rate": 1.8953132442411923e-08, "loss": 0.33, "step": 19444 }, { "epoch": 2.82, "grad_norm": 10.966289520263672, "learning_rate": 1.892247422197779e-08, "loss": 0.4577, "step": 19445 }, { "epoch": 2.82, "grad_norm": 9.257390975952148, "learning_rate": 1.889184058084703e-08, "loss": 0.3652, "step": 19446 }, { "epoch": 2.82, "grad_norm": 8.658434867858887, "learning_rate": 1.8861231519787026e-08, "loss": 0.351, "step": 19447 }, { "epoch": 2.82, "grad_norm": 8.260351181030273, "learning_rate": 1.883064703956472e-08, "loss": 0.3249, "step": 19448 }, { "epoch": 2.82, "grad_norm": 8.564602851867676, "learning_rate": 1.8800087140946164e-08, "loss": 0.3674, "step": 19449 }, { "epoch": 2.82, "grad_norm": 8.754434585571289, "learning_rate": 1.8769551824697305e-08, "loss": 0.34, "step": 19450 }, { "epoch": 2.82, "grad_norm": 8.347691535949707, "learning_rate": 1.8739041091582975e-08, "loss": 0.362, "step": 19451 }, { "epoch": 2.82, "grad_norm": 8.369412422180176, "learning_rate": 1.870855494236756e-08, "loss": 0.3301, "step": 19452 }, { "epoch": 2.82, "grad_norm": 7.9126434326171875, "learning_rate": 1.8678093377814897e-08, "loss": 0.2952, "step": 19453 }, { "epoch": 2.82, "grad_norm": 9.205344200134277, "learning_rate": 1.8647656398688038e-08, "loss": 0.3587, "step": 19454 }, { "epoch": 2.82, "grad_norm": 10.760086059570312, "learning_rate": 1.861724400574982e-08, "loss": 0.3406, "step": 19455 }, { "epoch": 2.82, "grad_norm": 10.372980117797852, "learning_rate": 1.8586856199761746e-08, "loss": 0.3622, "step": 19456 }, { "epoch": 2.82, "grad_norm": 8.44664478302002, "learning_rate": 1.855649298148554e-08, "loss": 0.3254, "step": 19457 }, { "epoch": 2.82, "grad_norm": 9.012566566467285, "learning_rate": 1.8526154351681478e-08, "loss": 0.4456, "step": 19458 }, { "epoch": 2.82, "grad_norm": 8.721940040588379, "learning_rate": 1.8495840311110177e-08, "loss": 0.3509, "step": 19459 }, { "epoch": 2.82, "grad_norm": 8.733683586120605, "learning_rate": 1.84655508605307e-08, "loss": 0.3632, "step": 19460 }, { "epoch": 2.82, "grad_norm": 8.217717170715332, "learning_rate": 1.843528600070199e-08, "loss": 0.3786, "step": 19461 }, { "epoch": 2.82, "grad_norm": 8.166173934936523, "learning_rate": 1.840504573238233e-08, "loss": 0.3344, "step": 19462 }, { "epoch": 2.82, "grad_norm": 8.624582290649414, "learning_rate": 1.8374830056329338e-08, "loss": 0.3507, "step": 19463 }, { "epoch": 2.82, "grad_norm": 8.13012981414795, "learning_rate": 1.8344638973299964e-08, "loss": 0.3407, "step": 19464 }, { "epoch": 2.82, "grad_norm": 8.195684432983398, "learning_rate": 1.8314472484050715e-08, "loss": 0.3626, "step": 19465 }, { "epoch": 2.82, "grad_norm": 9.370802879333496, "learning_rate": 1.8284330589337317e-08, "loss": 0.4073, "step": 19466 }, { "epoch": 2.82, "grad_norm": 8.468256950378418, "learning_rate": 1.8254213289914722e-08, "loss": 0.361, "step": 19467 }, { "epoch": 2.82, "grad_norm": 8.630218505859375, "learning_rate": 1.822412058653777e-08, "loss": 0.3638, "step": 19468 }, { "epoch": 2.82, "grad_norm": 9.878926277160645, "learning_rate": 1.8194052479960308e-08, "loss": 0.4113, "step": 19469 }, { "epoch": 2.83, "grad_norm": 8.79143238067627, "learning_rate": 1.8164008970935506e-08, "loss": 0.3005, "step": 19470 }, { "epoch": 2.83, "grad_norm": 8.063788414001465, "learning_rate": 1.813399006021621e-08, "loss": 0.3646, "step": 19471 }, { "epoch": 2.83, "grad_norm": 8.579154014587402, "learning_rate": 1.8103995748554256e-08, "loss": 0.3886, "step": 19472 }, { "epoch": 2.83, "grad_norm": 7.864823341369629, "learning_rate": 1.8074026036701496e-08, "loss": 0.316, "step": 19473 }, { "epoch": 2.83, "grad_norm": 9.299617767333984, "learning_rate": 1.8044080925408434e-08, "loss": 0.3698, "step": 19474 }, { "epoch": 2.83, "grad_norm": 8.21839714050293, "learning_rate": 1.801416041542536e-08, "loss": 0.3428, "step": 19475 }, { "epoch": 2.83, "grad_norm": 7.719982624053955, "learning_rate": 1.7984264507502122e-08, "loss": 0.2998, "step": 19476 }, { "epoch": 2.83, "grad_norm": 7.769845008850098, "learning_rate": 1.7954393202387342e-08, "loss": 0.3304, "step": 19477 }, { "epoch": 2.83, "grad_norm": 9.374907493591309, "learning_rate": 1.792454650082975e-08, "loss": 0.4019, "step": 19478 }, { "epoch": 2.83, "grad_norm": 8.627175331115723, "learning_rate": 1.7894724403576756e-08, "loss": 0.37, "step": 19479 }, { "epoch": 2.83, "grad_norm": 8.953932762145996, "learning_rate": 1.7864926911375867e-08, "loss": 0.3872, "step": 19480 }, { "epoch": 2.83, "grad_norm": 8.191045761108398, "learning_rate": 1.7835154024973377e-08, "loss": 0.3932, "step": 19481 }, { "epoch": 2.83, "grad_norm": 8.599886894226074, "learning_rate": 1.7805405745115243e-08, "loss": 0.2938, "step": 19482 }, { "epoch": 2.83, "grad_norm": 7.759550094604492, "learning_rate": 1.777568207254676e-08, "loss": 0.3533, "step": 19483 }, { "epoch": 2.83, "grad_norm": 7.9159979820251465, "learning_rate": 1.7745983008012555e-08, "loss": 0.3056, "step": 19484 }, { "epoch": 2.83, "grad_norm": 7.887180328369141, "learning_rate": 1.771630855225681e-08, "loss": 0.3361, "step": 19485 }, { "epoch": 2.83, "grad_norm": 8.294954299926758, "learning_rate": 1.7686658706022927e-08, "loss": 0.3355, "step": 19486 }, { "epoch": 2.83, "grad_norm": 7.870232582092285, "learning_rate": 1.765703347005365e-08, "loss": 0.2913, "step": 19487 }, { "epoch": 2.83, "grad_norm": 9.015034675598145, "learning_rate": 1.762743284509127e-08, "loss": 0.4066, "step": 19488 }, { "epoch": 2.83, "grad_norm": 8.627394676208496, "learning_rate": 1.7597856831877424e-08, "loss": 0.3938, "step": 19489 }, { "epoch": 2.83, "grad_norm": 7.777210235595703, "learning_rate": 1.7568305431153174e-08, "loss": 0.3582, "step": 19490 }, { "epoch": 2.83, "grad_norm": 8.047679901123047, "learning_rate": 1.753877864365849e-08, "loss": 0.3194, "step": 19491 }, { "epoch": 2.83, "grad_norm": 8.716729164123535, "learning_rate": 1.750927647013356e-08, "loss": 0.3346, "step": 19492 }, { "epoch": 2.83, "grad_norm": 8.231671333312988, "learning_rate": 1.7479798911317123e-08, "loss": 0.3579, "step": 19493 }, { "epoch": 2.83, "grad_norm": 8.655953407287598, "learning_rate": 1.7450345967948143e-08, "loss": 0.3121, "step": 19494 }, { "epoch": 2.83, "grad_norm": 8.888057708740234, "learning_rate": 1.7420917640764143e-08, "loss": 0.3267, "step": 19495 }, { "epoch": 2.83, "grad_norm": 8.145492553710938, "learning_rate": 1.7391513930502533e-08, "loss": 0.3387, "step": 19496 }, { "epoch": 2.83, "grad_norm": 8.456180572509766, "learning_rate": 1.7362134837899943e-08, "loss": 0.335, "step": 19497 }, { "epoch": 2.83, "grad_norm": 8.767362594604492, "learning_rate": 1.7332780363692568e-08, "loss": 0.37, "step": 19498 }, { "epoch": 2.83, "grad_norm": 8.642677307128906, "learning_rate": 1.730345050861559e-08, "loss": 0.3521, "step": 19499 }, { "epoch": 2.83, "grad_norm": 9.350149154663086, "learning_rate": 1.7274145273403984e-08, "loss": 0.3792, "step": 19500 }, { "epoch": 2.83, "grad_norm": 8.187827110290527, "learning_rate": 1.7244864658791935e-08, "loss": 0.3334, "step": 19501 }, { "epoch": 2.83, "grad_norm": 7.527706623077393, "learning_rate": 1.721560866551286e-08, "loss": 0.3124, "step": 19502 }, { "epoch": 2.83, "grad_norm": 10.189835548400879, "learning_rate": 1.718637729430006e-08, "loss": 0.3869, "step": 19503 }, { "epoch": 2.83, "grad_norm": 8.974985122680664, "learning_rate": 1.71571705458855e-08, "loss": 0.374, "step": 19504 }, { "epoch": 2.83, "grad_norm": 8.443480491638184, "learning_rate": 1.7127988421001162e-08, "loss": 0.3029, "step": 19505 }, { "epoch": 2.83, "grad_norm": 8.261285781860352, "learning_rate": 1.7098830920378005e-08, "loss": 0.3621, "step": 19506 }, { "epoch": 2.83, "grad_norm": 9.393730163574219, "learning_rate": 1.7069698044746672e-08, "loss": 0.315, "step": 19507 }, { "epoch": 2.83, "grad_norm": 8.619664192199707, "learning_rate": 1.7040589794836912e-08, "loss": 0.3409, "step": 19508 }, { "epoch": 2.83, "grad_norm": 9.878761291503906, "learning_rate": 1.7011506171378143e-08, "loss": 0.3921, "step": 19509 }, { "epoch": 2.83, "grad_norm": 8.249547958374023, "learning_rate": 1.698244717509878e-08, "loss": 0.3096, "step": 19510 }, { "epoch": 2.83, "grad_norm": 7.968673229217529, "learning_rate": 1.695341280672713e-08, "loss": 0.3611, "step": 19511 }, { "epoch": 2.83, "grad_norm": 9.404989242553711, "learning_rate": 1.6924403066990168e-08, "loss": 0.3625, "step": 19512 }, { "epoch": 2.83, "grad_norm": 8.540459632873535, "learning_rate": 1.68954179566152e-08, "loss": 0.3447, "step": 19513 }, { "epoch": 2.83, "grad_norm": 9.189234733581543, "learning_rate": 1.686645747632798e-08, "loss": 0.3392, "step": 19514 }, { "epoch": 2.83, "grad_norm": 7.089536190032959, "learning_rate": 1.6837521626854367e-08, "loss": 0.3447, "step": 19515 }, { "epoch": 2.83, "grad_norm": 9.171977043151855, "learning_rate": 1.680861040891912e-08, "loss": 0.3392, "step": 19516 }, { "epoch": 2.83, "grad_norm": 7.995136737823486, "learning_rate": 1.677972382324666e-08, "loss": 0.3017, "step": 19517 }, { "epoch": 2.83, "grad_norm": 10.96194839477539, "learning_rate": 1.6750861870560628e-08, "loss": 0.3875, "step": 19518 }, { "epoch": 2.83, "grad_norm": 8.419840812683105, "learning_rate": 1.6722024551584113e-08, "loss": 0.3455, "step": 19519 }, { "epoch": 2.83, "grad_norm": 11.36938762664795, "learning_rate": 1.6693211867039648e-08, "loss": 0.4309, "step": 19520 }, { "epoch": 2.83, "grad_norm": 8.182859420776367, "learning_rate": 1.66644238176491e-08, "loss": 0.3033, "step": 19521 }, { "epoch": 2.83, "grad_norm": 8.295427322387695, "learning_rate": 1.6635660404133556e-08, "loss": 0.3774, "step": 19522 }, { "epoch": 2.83, "grad_norm": 7.489344120025635, "learning_rate": 1.6606921627213778e-08, "loss": 0.296, "step": 19523 }, { "epoch": 2.83, "grad_norm": 8.908000946044922, "learning_rate": 1.657820748760974e-08, "loss": 0.4149, "step": 19524 }, { "epoch": 2.83, "grad_norm": 9.029478073120117, "learning_rate": 1.6549517986040984e-08, "loss": 0.378, "step": 19525 }, { "epoch": 2.83, "grad_norm": 7.5192670822143555, "learning_rate": 1.652085312322582e-08, "loss": 0.3318, "step": 19526 }, { "epoch": 2.83, "grad_norm": 9.012491226196289, "learning_rate": 1.6492212899882894e-08, "loss": 0.3353, "step": 19527 }, { "epoch": 2.83, "grad_norm": 8.338065147399902, "learning_rate": 1.6463597316729417e-08, "loss": 0.3472, "step": 19528 }, { "epoch": 2.83, "grad_norm": 8.192874908447266, "learning_rate": 1.6435006374482587e-08, "loss": 0.3382, "step": 19529 }, { "epoch": 2.83, "grad_norm": 9.39719295501709, "learning_rate": 1.6406440073858496e-08, "loss": 0.3107, "step": 19530 }, { "epoch": 2.83, "grad_norm": 8.750813484191895, "learning_rate": 1.63778984155728e-08, "loss": 0.3354, "step": 19531 }, { "epoch": 2.83, "grad_norm": 9.601614952087402, "learning_rate": 1.63493814003407e-08, "loss": 0.4278, "step": 19532 }, { "epoch": 2.83, "grad_norm": 8.804248809814453, "learning_rate": 1.6320889028876517e-08, "loss": 0.3468, "step": 19533 }, { "epoch": 2.83, "grad_norm": 8.012954711914062, "learning_rate": 1.6292421301894232e-08, "loss": 0.3486, "step": 19534 }, { "epoch": 2.83, "grad_norm": 8.668569564819336, "learning_rate": 1.6263978220106943e-08, "loss": 0.384, "step": 19535 }, { "epoch": 2.83, "grad_norm": 7.906814098358154, "learning_rate": 1.6235559784227303e-08, "loss": 0.3284, "step": 19536 }, { "epoch": 2.83, "grad_norm": 9.352377891540527, "learning_rate": 1.6207165994967298e-08, "loss": 0.3544, "step": 19537 }, { "epoch": 2.83, "grad_norm": 8.630887031555176, "learning_rate": 1.6178796853038136e-08, "loss": 0.364, "step": 19538 }, { "epoch": 2.84, "grad_norm": 9.174053192138672, "learning_rate": 1.615045235915069e-08, "loss": 0.3535, "step": 19539 }, { "epoch": 2.84, "grad_norm": 8.773895263671875, "learning_rate": 1.6122132514015174e-08, "loss": 0.3508, "step": 19540 }, { "epoch": 2.84, "grad_norm": 8.694759368896484, "learning_rate": 1.6093837318340908e-08, "loss": 0.3506, "step": 19541 }, { "epoch": 2.84, "grad_norm": 9.013431549072266, "learning_rate": 1.606556677283688e-08, "loss": 0.3715, "step": 19542 }, { "epoch": 2.84, "grad_norm": 8.854832649230957, "learning_rate": 1.6037320878211412e-08, "loss": 0.396, "step": 19543 }, { "epoch": 2.84, "grad_norm": 10.504822731018066, "learning_rate": 1.600909963517205e-08, "loss": 0.4421, "step": 19544 }, { "epoch": 2.84, "grad_norm": 9.332540512084961, "learning_rate": 1.5980903044425897e-08, "loss": 0.3635, "step": 19545 }, { "epoch": 2.84, "grad_norm": 7.998968601226807, "learning_rate": 1.5952731106679383e-08, "loss": 0.3389, "step": 19546 }, { "epoch": 2.84, "grad_norm": 11.840102195739746, "learning_rate": 1.592458382263817e-08, "loss": 0.3299, "step": 19547 }, { "epoch": 2.84, "grad_norm": 7.859742164611816, "learning_rate": 1.5896461193007694e-08, "loss": 0.3644, "step": 19548 }, { "epoch": 2.84, "grad_norm": 9.119901657104492, "learning_rate": 1.5868363218492275e-08, "loss": 0.3628, "step": 19549 }, { "epoch": 2.84, "grad_norm": 8.422849655151367, "learning_rate": 1.5840289899796024e-08, "loss": 0.3256, "step": 19550 }, { "epoch": 2.84, "grad_norm": 7.7922587394714355, "learning_rate": 1.581224123762226e-08, "loss": 0.3046, "step": 19551 }, { "epoch": 2.84, "grad_norm": 9.448640823364258, "learning_rate": 1.5784217232673536e-08, "loss": 0.3743, "step": 19552 }, { "epoch": 2.84, "grad_norm": 8.91761302947998, "learning_rate": 1.575621788565218e-08, "loss": 0.3417, "step": 19553 }, { "epoch": 2.84, "grad_norm": 10.267961502075195, "learning_rate": 1.5728243197259407e-08, "loss": 0.3496, "step": 19554 }, { "epoch": 2.84, "grad_norm": 8.175307273864746, "learning_rate": 1.5700293168196433e-08, "loss": 0.3205, "step": 19555 }, { "epoch": 2.84, "grad_norm": 7.9302520751953125, "learning_rate": 1.5672367799163146e-08, "loss": 0.3352, "step": 19556 }, { "epoch": 2.84, "grad_norm": 8.737143516540527, "learning_rate": 1.5644467090859425e-08, "loss": 0.3575, "step": 19557 }, { "epoch": 2.84, "grad_norm": 8.56033706665039, "learning_rate": 1.561659104398405e-08, "loss": 0.338, "step": 19558 }, { "epoch": 2.84, "grad_norm": 8.148853302001953, "learning_rate": 1.5588739659235573e-08, "loss": 0.3243, "step": 19559 }, { "epoch": 2.84, "grad_norm": 10.30615234375, "learning_rate": 1.5560912937311765e-08, "loss": 0.4167, "step": 19560 }, { "epoch": 2.84, "grad_norm": 8.636122703552246, "learning_rate": 1.5533110878909738e-08, "loss": 0.3314, "step": 19561 }, { "epoch": 2.84, "grad_norm": 9.215556144714355, "learning_rate": 1.5505333484726045e-08, "loss": 0.3424, "step": 19562 }, { "epoch": 2.84, "grad_norm": 7.6725263595581055, "learning_rate": 1.5477580755456466e-08, "loss": 0.3105, "step": 19563 }, { "epoch": 2.84, "grad_norm": 7.853243350982666, "learning_rate": 1.544985269179655e-08, "loss": 0.3357, "step": 19564 }, { "epoch": 2.84, "grad_norm": 7.3217878341674805, "learning_rate": 1.5422149294440857e-08, "loss": 0.3138, "step": 19565 }, { "epoch": 2.84, "grad_norm": 7.930241107940674, "learning_rate": 1.5394470564083272e-08, "loss": 0.3179, "step": 19566 }, { "epoch": 2.84, "grad_norm": 9.783153533935547, "learning_rate": 1.536681650141758e-08, "loss": 0.372, "step": 19567 }, { "epoch": 2.84, "grad_norm": 8.951286315917969, "learning_rate": 1.533918710713633e-08, "loss": 0.3276, "step": 19568 }, { "epoch": 2.84, "grad_norm": 8.443623542785645, "learning_rate": 1.5311582381931974e-08, "loss": 0.3561, "step": 19569 }, { "epoch": 2.84, "grad_norm": 8.732263565063477, "learning_rate": 1.5284002326495848e-08, "loss": 0.3072, "step": 19570 }, { "epoch": 2.84, "grad_norm": 8.225095748901367, "learning_rate": 1.5256446941519176e-08, "loss": 0.3515, "step": 19571 }, { "epoch": 2.84, "grad_norm": 8.099969863891602, "learning_rate": 1.5228916227692068e-08, "loss": 0.3134, "step": 19572 }, { "epoch": 2.84, "grad_norm": 8.572863578796387, "learning_rate": 1.5201410185704423e-08, "loss": 0.313, "step": 19573 }, { "epoch": 2.84, "grad_norm": 8.937801361083984, "learning_rate": 1.5173928816245352e-08, "loss": 0.3573, "step": 19574 }, { "epoch": 2.84, "grad_norm": 8.980063438415527, "learning_rate": 1.5146472120003195e-08, "loss": 0.3384, "step": 19575 }, { "epoch": 2.84, "grad_norm": 9.5789155960083, "learning_rate": 1.511904009766607e-08, "loss": 0.3969, "step": 19576 }, { "epoch": 2.84, "grad_norm": 7.161252975463867, "learning_rate": 1.5091632749921092e-08, "loss": 0.292, "step": 19577 }, { "epoch": 2.84, "grad_norm": 9.171738624572754, "learning_rate": 1.506425007745493e-08, "loss": 0.3519, "step": 19578 }, { "epoch": 2.84, "grad_norm": 9.303423881530762, "learning_rate": 1.503689208095371e-08, "loss": 0.3825, "step": 19579 }, { "epoch": 2.84, "grad_norm": 8.171374320983887, "learning_rate": 1.5009558761102657e-08, "loss": 0.3375, "step": 19580 }, { "epoch": 2.84, "grad_norm": 8.185816764831543, "learning_rate": 1.498225011858678e-08, "loss": 0.3496, "step": 19581 }, { "epoch": 2.84, "grad_norm": 8.239447593688965, "learning_rate": 1.4954966154090086e-08, "loss": 0.3991, "step": 19582 }, { "epoch": 2.84, "grad_norm": 8.392770767211914, "learning_rate": 1.4927706868296143e-08, "loss": 0.3378, "step": 19583 }, { "epoch": 2.84, "grad_norm": 9.09199047088623, "learning_rate": 1.4900472261887954e-08, "loss": 0.3403, "step": 19584 }, { "epoch": 2.84, "grad_norm": 10.133056640625, "learning_rate": 1.4873262335547865e-08, "loss": 0.4015, "step": 19585 }, { "epoch": 2.84, "grad_norm": 7.8502984046936035, "learning_rate": 1.4846077089957553e-08, "loss": 0.3385, "step": 19586 }, { "epoch": 2.84, "grad_norm": 8.500205993652344, "learning_rate": 1.481891652579792e-08, "loss": 0.3281, "step": 19587 }, { "epoch": 2.84, "grad_norm": 9.844616889953613, "learning_rate": 1.4791780643749752e-08, "loss": 0.3866, "step": 19588 }, { "epoch": 2.84, "grad_norm": 7.7499308586120605, "learning_rate": 1.4764669444492616e-08, "loss": 0.3148, "step": 19589 }, { "epoch": 2.84, "grad_norm": 8.255749702453613, "learning_rate": 1.4737582928705861e-08, "loss": 0.3533, "step": 19590 }, { "epoch": 2.84, "grad_norm": 8.804388046264648, "learning_rate": 1.4710521097068051e-08, "loss": 0.3746, "step": 19591 }, { "epoch": 2.84, "grad_norm": 10.041297912597656, "learning_rate": 1.4683483950257314e-08, "loss": 0.4039, "step": 19592 }, { "epoch": 2.84, "grad_norm": 7.652607440948486, "learning_rate": 1.4656471488950884e-08, "loss": 0.3292, "step": 19593 }, { "epoch": 2.84, "grad_norm": 10.408729553222656, "learning_rate": 1.4629483713825552e-08, "loss": 0.2833, "step": 19594 }, { "epoch": 2.84, "grad_norm": 10.046892166137695, "learning_rate": 1.4602520625557336e-08, "loss": 0.348, "step": 19595 }, { "epoch": 2.84, "grad_norm": 7.896914958953857, "learning_rate": 1.4575582224822024e-08, "loss": 0.3031, "step": 19596 }, { "epoch": 2.84, "grad_norm": 8.843230247497559, "learning_rate": 1.4548668512294194e-08, "loss": 0.3319, "step": 19597 }, { "epoch": 2.84, "grad_norm": 9.380071640014648, "learning_rate": 1.4521779488648412e-08, "loss": 0.3312, "step": 19598 }, { "epoch": 2.84, "grad_norm": 9.461359024047852, "learning_rate": 1.4494915154558141e-08, "loss": 0.3262, "step": 19599 }, { "epoch": 2.84, "grad_norm": 8.582756042480469, "learning_rate": 1.446807551069651e-08, "loss": 0.3407, "step": 19600 }, { "epoch": 2.84, "grad_norm": 8.077813148498535, "learning_rate": 1.4441260557735869e-08, "loss": 0.3313, "step": 19601 }, { "epoch": 2.84, "grad_norm": 7.836039066314697, "learning_rate": 1.4414470296348236e-08, "loss": 0.3188, "step": 19602 }, { "epoch": 2.84, "grad_norm": 8.90456485748291, "learning_rate": 1.4387704727204519e-08, "loss": 0.3615, "step": 19603 }, { "epoch": 2.84, "grad_norm": 9.38117790222168, "learning_rate": 1.4360963850975515e-08, "loss": 0.363, "step": 19604 }, { "epoch": 2.84, "grad_norm": 8.553450584411621, "learning_rate": 1.4334247668330913e-08, "loss": 0.327, "step": 19605 }, { "epoch": 2.84, "grad_norm": 7.271415710449219, "learning_rate": 1.4307556179940173e-08, "loss": 0.3128, "step": 19606 }, { "epoch": 2.84, "grad_norm": 8.342082977294922, "learning_rate": 1.4280889386472206e-08, "loss": 0.3366, "step": 19607 }, { "epoch": 2.85, "grad_norm": 7.836815357208252, "learning_rate": 1.4254247288594811e-08, "loss": 0.3533, "step": 19608 }, { "epoch": 2.85, "grad_norm": 7.641151428222656, "learning_rate": 1.4227629886975679e-08, "loss": 0.3323, "step": 19609 }, { "epoch": 2.85, "grad_norm": 7.694514274597168, "learning_rate": 1.4201037182281495e-08, "loss": 0.3275, "step": 19610 }, { "epoch": 2.85, "grad_norm": 7.685854434967041, "learning_rate": 1.4174469175178617e-08, "loss": 0.3163, "step": 19611 }, { "epoch": 2.85, "grad_norm": 8.8115873336792, "learning_rate": 1.4147925866332622e-08, "loss": 0.3355, "step": 19612 }, { "epoch": 2.85, "grad_norm": 9.259930610656738, "learning_rate": 1.4121407256408424e-08, "loss": 0.3386, "step": 19613 }, { "epoch": 2.85, "grad_norm": 9.477728843688965, "learning_rate": 1.4094913346070491e-08, "loss": 0.3323, "step": 19614 }, { "epoch": 2.85, "grad_norm": 9.590536117553711, "learning_rate": 1.4068444135982626e-08, "loss": 0.4271, "step": 19615 }, { "epoch": 2.85, "grad_norm": 9.137503623962402, "learning_rate": 1.4041999626807965e-08, "loss": 0.3993, "step": 19616 }, { "epoch": 2.85, "grad_norm": 7.90793514251709, "learning_rate": 1.4015579819208867e-08, "loss": 0.33, "step": 19617 }, { "epoch": 2.85, "grad_norm": 10.543384552001953, "learning_rate": 1.3989184713847468e-08, "loss": 0.33, "step": 19618 }, { "epoch": 2.85, "grad_norm": 8.608809471130371, "learning_rate": 1.3962814311384908e-08, "loss": 0.3401, "step": 19619 }, { "epoch": 2.85, "grad_norm": 8.043418884277344, "learning_rate": 1.3936468612481878e-08, "loss": 0.351, "step": 19620 }, { "epoch": 2.85, "grad_norm": 8.066728591918945, "learning_rate": 1.3910147617798517e-08, "loss": 0.3291, "step": 19621 }, { "epoch": 2.85, "grad_norm": 7.878907203674316, "learning_rate": 1.3883851327994078e-08, "loss": 0.34, "step": 19622 }, { "epoch": 2.85, "grad_norm": 8.394251823425293, "learning_rate": 1.3857579743727587e-08, "loss": 0.3594, "step": 19623 }, { "epoch": 2.85, "grad_norm": 8.935673713684082, "learning_rate": 1.3831332865656963e-08, "loss": 0.3606, "step": 19624 }, { "epoch": 2.85, "grad_norm": 9.887530326843262, "learning_rate": 1.3805110694440124e-08, "loss": 0.3988, "step": 19625 }, { "epoch": 2.85, "grad_norm": 8.655119895935059, "learning_rate": 1.3778913230733657e-08, "loss": 0.3502, "step": 19626 }, { "epoch": 2.85, "grad_norm": 8.356966018676758, "learning_rate": 1.3752740475194257e-08, "loss": 0.3799, "step": 19627 }, { "epoch": 2.85, "grad_norm": 8.165138244628906, "learning_rate": 1.3726592428477291e-08, "loss": 0.3527, "step": 19628 }, { "epoch": 2.85, "grad_norm": 9.06029987335205, "learning_rate": 1.370046909123812e-08, "loss": 0.3784, "step": 19629 }, { "epoch": 2.85, "grad_norm": 7.739502906799316, "learning_rate": 1.3674370464131114e-08, "loss": 0.3217, "step": 19630 }, { "epoch": 2.85, "grad_norm": 7.540474891662598, "learning_rate": 1.364829654781008e-08, "loss": 0.3079, "step": 19631 }, { "epoch": 2.85, "grad_norm": 7.740227699279785, "learning_rate": 1.3622247342928383e-08, "loss": 0.3444, "step": 19632 }, { "epoch": 2.85, "grad_norm": 10.166059494018555, "learning_rate": 1.3596222850138616e-08, "loss": 0.4681, "step": 19633 }, { "epoch": 2.85, "grad_norm": 8.535499572753906, "learning_rate": 1.3570223070092701e-08, "loss": 0.3228, "step": 19634 }, { "epoch": 2.85, "grad_norm": 7.75351619720459, "learning_rate": 1.3544248003442115e-08, "loss": 0.309, "step": 19635 }, { "epoch": 2.85, "grad_norm": 9.228791236877441, "learning_rate": 1.3518297650837562e-08, "loss": 0.3714, "step": 19636 }, { "epoch": 2.85, "grad_norm": 8.385298728942871, "learning_rate": 1.3492372012929187e-08, "loss": 0.3481, "step": 19637 }, { "epoch": 2.85, "grad_norm": 8.522384643554688, "learning_rate": 1.3466471090366472e-08, "loss": 0.3316, "step": 19638 }, { "epoch": 2.85, "grad_norm": 8.6923828125, "learning_rate": 1.3440594883798451e-08, "loss": 0.3545, "step": 19639 }, { "epoch": 2.85, "grad_norm": 10.153271675109863, "learning_rate": 1.3414743393873274e-08, "loss": 0.3892, "step": 19640 }, { "epoch": 2.85, "grad_norm": 8.589892387390137, "learning_rate": 1.3388916621238645e-08, "loss": 0.3153, "step": 19641 }, { "epoch": 2.85, "grad_norm": 7.483738899230957, "learning_rate": 1.3363114566541822e-08, "loss": 0.2866, "step": 19642 }, { "epoch": 2.85, "grad_norm": 8.580484390258789, "learning_rate": 1.3337337230428736e-08, "loss": 0.3746, "step": 19643 }, { "epoch": 2.85, "grad_norm": 9.768728256225586, "learning_rate": 1.3311584613545756e-08, "loss": 0.4025, "step": 19644 }, { "epoch": 2.85, "grad_norm": 8.188042640686035, "learning_rate": 1.3285856716537703e-08, "loss": 0.3359, "step": 19645 }, { "epoch": 2.85, "grad_norm": 9.087270736694336, "learning_rate": 1.326015354004939e-08, "loss": 0.391, "step": 19646 }, { "epoch": 2.85, "grad_norm": 8.724813461303711, "learning_rate": 1.3234475084724527e-08, "loss": 0.3393, "step": 19647 }, { "epoch": 2.85, "grad_norm": 9.85452938079834, "learning_rate": 1.3208821351206712e-08, "loss": 0.3831, "step": 19648 }, { "epoch": 2.85, "grad_norm": 8.621203422546387, "learning_rate": 1.3183192340138315e-08, "loss": 0.3322, "step": 19649 }, { "epoch": 2.85, "grad_norm": 8.620756149291992, "learning_rate": 1.3157588052161717e-08, "loss": 0.3298, "step": 19650 }, { "epoch": 2.85, "grad_norm": 8.46997356414795, "learning_rate": 1.3132008487918288e-08, "loss": 0.313, "step": 19651 }, { "epoch": 2.85, "grad_norm": 7.7002387046813965, "learning_rate": 1.3106453648048965e-08, "loss": 0.3329, "step": 19652 }, { "epoch": 2.85, "grad_norm": 8.508271217346191, "learning_rate": 1.30809235331939e-08, "loss": 0.382, "step": 19653 }, { "epoch": 2.85, "grad_norm": 9.59500503540039, "learning_rate": 1.3055418143992581e-08, "loss": 0.4099, "step": 19654 }, { "epoch": 2.85, "grad_norm": 9.116886138916016, "learning_rate": 1.3029937481084275e-08, "loss": 0.3901, "step": 19655 }, { "epoch": 2.85, "grad_norm": 8.772146224975586, "learning_rate": 1.3004481545107249e-08, "loss": 0.3032, "step": 19656 }, { "epoch": 2.85, "grad_norm": 7.967455863952637, "learning_rate": 1.2979050336699105e-08, "loss": 0.3167, "step": 19657 }, { "epoch": 2.85, "grad_norm": 9.854512214660645, "learning_rate": 1.295364385649722e-08, "loss": 0.4268, "step": 19658 }, { "epoch": 2.85, "grad_norm": 7.833713531494141, "learning_rate": 1.2928262105137977e-08, "loss": 0.3114, "step": 19659 }, { "epoch": 2.85, "grad_norm": 9.654668807983398, "learning_rate": 1.290290508325731e-08, "loss": 0.3649, "step": 19660 }, { "epoch": 2.85, "grad_norm": 7.363293170928955, "learning_rate": 1.2877572791490488e-08, "loss": 0.3124, "step": 19661 }, { "epoch": 2.85, "grad_norm": 10.29638671875, "learning_rate": 1.2852265230472115e-08, "loss": 0.4144, "step": 19662 }, { "epoch": 2.85, "grad_norm": 8.348615646362305, "learning_rate": 1.282698240083646e-08, "loss": 0.3688, "step": 19663 }, { "epoch": 2.85, "grad_norm": 8.525075912475586, "learning_rate": 1.2801724303216577e-08, "loss": 0.3414, "step": 19664 }, { "epoch": 2.85, "grad_norm": 10.021038055419922, "learning_rate": 1.2776490938245621e-08, "loss": 0.4119, "step": 19665 }, { "epoch": 2.85, "grad_norm": 9.452499389648438, "learning_rate": 1.2751282306555421e-08, "loss": 0.4086, "step": 19666 }, { "epoch": 2.85, "grad_norm": 8.82833480834961, "learning_rate": 1.2726098408777919e-08, "loss": 0.3122, "step": 19667 }, { "epoch": 2.85, "grad_norm": 8.322757720947266, "learning_rate": 1.270093924554383e-08, "loss": 0.3688, "step": 19668 }, { "epoch": 2.85, "grad_norm": 7.9078497886657715, "learning_rate": 1.2675804817483537e-08, "loss": 0.3617, "step": 19669 }, { "epoch": 2.85, "grad_norm": 9.686027526855469, "learning_rate": 1.2650695125226652e-08, "loss": 0.3684, "step": 19670 }, { "epoch": 2.85, "grad_norm": 7.816746711730957, "learning_rate": 1.2625610169402334e-08, "loss": 0.3292, "step": 19671 }, { "epoch": 2.85, "grad_norm": 8.14000129699707, "learning_rate": 1.2600549950639084e-08, "loss": 0.3215, "step": 19672 }, { "epoch": 2.85, "grad_norm": 7.854248046875, "learning_rate": 1.257551446956462e-08, "loss": 0.2961, "step": 19673 }, { "epoch": 2.85, "grad_norm": 8.225305557250977, "learning_rate": 1.2550503726806328e-08, "loss": 0.3386, "step": 19674 }, { "epoch": 2.85, "grad_norm": 7.60124397277832, "learning_rate": 1.2525517722990708e-08, "loss": 0.3327, "step": 19675 }, { "epoch": 2.85, "grad_norm": 9.71646785736084, "learning_rate": 1.2500556458743705e-08, "loss": 0.3798, "step": 19676 }, { "epoch": 2.86, "grad_norm": 8.099305152893066, "learning_rate": 1.2475619934690818e-08, "loss": 0.2967, "step": 19677 }, { "epoch": 2.86, "grad_norm": 8.863773345947266, "learning_rate": 1.2450708151456657e-08, "loss": 0.2793, "step": 19678 }, { "epoch": 2.86, "grad_norm": 8.855713844299316, "learning_rate": 1.2425821109665502e-08, "loss": 0.4349, "step": 19679 }, { "epoch": 2.86, "grad_norm": 8.296682357788086, "learning_rate": 1.2400958809940631e-08, "loss": 0.3226, "step": 19680 }, { "epoch": 2.86, "grad_norm": 8.527205467224121, "learning_rate": 1.2376121252905215e-08, "loss": 0.3504, "step": 19681 }, { "epoch": 2.86, "grad_norm": 6.915406227111816, "learning_rate": 1.2351308439181086e-08, "loss": 0.3364, "step": 19682 }, { "epoch": 2.86, "grad_norm": 9.233060836791992, "learning_rate": 1.2326520369390414e-08, "loss": 0.3302, "step": 19683 }, { "epoch": 2.86, "grad_norm": 8.869608879089355, "learning_rate": 1.2301757044153927e-08, "loss": 0.3943, "step": 19684 }, { "epoch": 2.86, "grad_norm": 9.849708557128906, "learning_rate": 1.2277018464092014e-08, "loss": 0.3709, "step": 19685 }, { "epoch": 2.86, "grad_norm": 8.8680419921875, "learning_rate": 1.2252304629824628e-08, "loss": 0.3141, "step": 19686 }, { "epoch": 2.86, "grad_norm": 8.564929008483887, "learning_rate": 1.2227615541970604e-08, "loss": 0.2917, "step": 19687 }, { "epoch": 2.86, "grad_norm": 8.359261512756348, "learning_rate": 1.2202951201149003e-08, "loss": 0.3247, "step": 19688 }, { "epoch": 2.86, "grad_norm": 8.107728004455566, "learning_rate": 1.2178311607977331e-08, "loss": 0.3287, "step": 19689 }, { "epoch": 2.86, "grad_norm": 7.697904586791992, "learning_rate": 1.2153696763072985e-08, "loss": 0.3284, "step": 19690 }, { "epoch": 2.86, "grad_norm": 8.817927360534668, "learning_rate": 1.2129106667052691e-08, "loss": 0.3476, "step": 19691 }, { "epoch": 2.86, "grad_norm": 7.944738388061523, "learning_rate": 1.2104541320532513e-08, "loss": 0.3356, "step": 19692 }, { "epoch": 2.86, "grad_norm": 9.300819396972656, "learning_rate": 1.2080000724127958e-08, "loss": 0.3376, "step": 19693 }, { "epoch": 2.86, "grad_norm": 7.845280170440674, "learning_rate": 1.2055484878453648e-08, "loss": 0.3124, "step": 19694 }, { "epoch": 2.86, "grad_norm": 7.913603782653809, "learning_rate": 1.2030993784124088e-08, "loss": 0.3327, "step": 19695 }, { "epoch": 2.86, "grad_norm": 8.27088737487793, "learning_rate": 1.2006527441752568e-08, "loss": 0.301, "step": 19696 }, { "epoch": 2.86, "grad_norm": 8.866392135620117, "learning_rate": 1.1982085851952261e-08, "loss": 0.3698, "step": 19697 }, { "epoch": 2.86, "grad_norm": 8.28747844696045, "learning_rate": 1.195766901533546e-08, "loss": 0.3384, "step": 19698 }, { "epoch": 2.86, "grad_norm": 8.277054786682129, "learning_rate": 1.193327693251367e-08, "loss": 0.3194, "step": 19699 }, { "epoch": 2.86, "grad_norm": 7.802028656005859, "learning_rate": 1.1908909604098406e-08, "loss": 0.33, "step": 19700 }, { "epoch": 2.86, "grad_norm": 9.210144996643066, "learning_rate": 1.1884567030699843e-08, "loss": 0.3587, "step": 19701 }, { "epoch": 2.86, "grad_norm": 8.177510261535645, "learning_rate": 1.1860249212927941e-08, "loss": 0.312, "step": 19702 }, { "epoch": 2.86, "grad_norm": 8.80842399597168, "learning_rate": 1.183595615139199e-08, "loss": 0.3303, "step": 19703 }, { "epoch": 2.86, "grad_norm": 7.8537092208862305, "learning_rate": 1.1811687846700501e-08, "loss": 0.3404, "step": 19704 }, { "epoch": 2.86, "grad_norm": 8.460806846618652, "learning_rate": 1.1787444299461435e-08, "loss": 0.3509, "step": 19705 }, { "epoch": 2.86, "grad_norm": 8.521724700927734, "learning_rate": 1.1763225510282416e-08, "loss": 0.3649, "step": 19706 }, { "epoch": 2.86, "grad_norm": 8.419637680053711, "learning_rate": 1.1739031479770067e-08, "loss": 0.3298, "step": 19707 }, { "epoch": 2.86, "grad_norm": 8.136680603027344, "learning_rate": 1.1714862208530462e-08, "loss": 0.3269, "step": 19708 }, { "epoch": 2.86, "grad_norm": 10.08447265625, "learning_rate": 1.1690717697169227e-08, "loss": 0.3682, "step": 19709 }, { "epoch": 2.86, "grad_norm": 7.929592609405518, "learning_rate": 1.1666597946291213e-08, "loss": 0.3577, "step": 19710 }, { "epoch": 2.86, "grad_norm": 8.397598266601562, "learning_rate": 1.164250295650071e-08, "loss": 0.3247, "step": 19711 }, { "epoch": 2.86, "grad_norm": 8.510725021362305, "learning_rate": 1.1618432728401351e-08, "loss": 0.3488, "step": 19712 }, { "epoch": 2.86, "grad_norm": 8.741045951843262, "learning_rate": 1.1594387262596318e-08, "loss": 0.3149, "step": 19713 }, { "epoch": 2.86, "grad_norm": 8.567885398864746, "learning_rate": 1.1570366559687794e-08, "loss": 0.384, "step": 19714 }, { "epoch": 2.86, "grad_norm": 8.76605224609375, "learning_rate": 1.1546370620277746e-08, "loss": 0.2957, "step": 19715 }, { "epoch": 2.86, "grad_norm": 7.1061692237854, "learning_rate": 1.1522399444967357e-08, "loss": 0.3334, "step": 19716 }, { "epoch": 2.86, "grad_norm": 8.138162612915039, "learning_rate": 1.1498453034357147e-08, "loss": 0.3814, "step": 19717 }, { "epoch": 2.86, "grad_norm": 9.13493537902832, "learning_rate": 1.147453138904697e-08, "loss": 0.4043, "step": 19718 }, { "epoch": 2.86, "grad_norm": 10.56307601928711, "learning_rate": 1.1450634509636348e-08, "loss": 0.3656, "step": 19719 }, { "epoch": 2.86, "grad_norm": 7.803188323974609, "learning_rate": 1.14267623967238e-08, "loss": 0.3262, "step": 19720 }, { "epoch": 2.86, "grad_norm": 9.80074405670166, "learning_rate": 1.1402915050907402e-08, "loss": 0.3202, "step": 19721 }, { "epoch": 2.86, "grad_norm": 8.771625518798828, "learning_rate": 1.137909247278468e-08, "loss": 0.3779, "step": 19722 }, { "epoch": 2.86, "grad_norm": 9.647923469543457, "learning_rate": 1.1355294662952596e-08, "loss": 0.3374, "step": 19723 }, { "epoch": 2.86, "grad_norm": 7.699787616729736, "learning_rate": 1.1331521622007124e-08, "loss": 0.3026, "step": 19724 }, { "epoch": 2.86, "grad_norm": 8.341378211975098, "learning_rate": 1.1307773350543892e-08, "loss": 0.3173, "step": 19725 }, { "epoch": 2.86, "grad_norm": 8.671902656555176, "learning_rate": 1.1284049849157984e-08, "loss": 0.328, "step": 19726 }, { "epoch": 2.86, "grad_norm": 8.178104400634766, "learning_rate": 1.1260351118443701e-08, "loss": 0.3381, "step": 19727 }, { "epoch": 2.86, "grad_norm": 9.133391380310059, "learning_rate": 1.12366771589949e-08, "loss": 0.3696, "step": 19728 }, { "epoch": 2.86, "grad_norm": 8.48572826385498, "learning_rate": 1.1213027971404442e-08, "loss": 0.3206, "step": 19729 }, { "epoch": 2.86, "grad_norm": 8.55135440826416, "learning_rate": 1.1189403556264964e-08, "loss": 0.3089, "step": 19730 }, { "epoch": 2.86, "grad_norm": 8.865574836730957, "learning_rate": 1.1165803914168437e-08, "loss": 0.3082, "step": 19731 }, { "epoch": 2.86, "grad_norm": 9.016947746276855, "learning_rate": 1.1142229045705942e-08, "loss": 0.3657, "step": 19732 }, { "epoch": 2.86, "grad_norm": 8.681565284729004, "learning_rate": 1.111867895146823e-08, "loss": 0.331, "step": 19733 }, { "epoch": 2.86, "grad_norm": 10.437596321105957, "learning_rate": 1.1095153632045163e-08, "loss": 0.398, "step": 19734 }, { "epoch": 2.86, "grad_norm": 9.752408027648926, "learning_rate": 1.1071653088026268e-08, "loss": 0.3543, "step": 19735 }, { "epoch": 2.86, "grad_norm": 8.947750091552734, "learning_rate": 1.1048177320000184e-08, "loss": 0.3957, "step": 19736 }, { "epoch": 2.86, "grad_norm": 10.254494667053223, "learning_rate": 1.1024726328555223e-08, "loss": 0.4242, "step": 19737 }, { "epoch": 2.86, "grad_norm": 7.77790641784668, "learning_rate": 1.1001300114278911e-08, "loss": 0.3377, "step": 19738 }, { "epoch": 2.86, "grad_norm": 10.114913940429688, "learning_rate": 1.097789867775789e-08, "loss": 0.3356, "step": 19739 }, { "epoch": 2.86, "grad_norm": 8.232791900634766, "learning_rate": 1.0954522019578805e-08, "loss": 0.3342, "step": 19740 }, { "epoch": 2.86, "grad_norm": 10.295296669006348, "learning_rate": 1.0931170140326962e-08, "loss": 0.4235, "step": 19741 }, { "epoch": 2.86, "grad_norm": 10.551021575927734, "learning_rate": 1.0907843040587783e-08, "loss": 0.4276, "step": 19742 }, { "epoch": 2.86, "grad_norm": 8.484967231750488, "learning_rate": 1.0884540720945246e-08, "loss": 0.3031, "step": 19743 }, { "epoch": 2.86, "grad_norm": 7.743077278137207, "learning_rate": 1.086126318198366e-08, "loss": 0.3291, "step": 19744 }, { "epoch": 2.86, "grad_norm": 9.694648742675781, "learning_rate": 1.0838010424285782e-08, "loss": 0.3714, "step": 19745 }, { "epoch": 2.87, "grad_norm": 8.619375228881836, "learning_rate": 1.0814782448434368e-08, "loss": 0.4015, "step": 19746 }, { "epoch": 2.87, "grad_norm": 8.996919631958008, "learning_rate": 1.0791579255011284e-08, "loss": 0.3618, "step": 19747 }, { "epoch": 2.87, "grad_norm": 8.36635971069336, "learning_rate": 1.0768400844597958e-08, "loss": 0.3457, "step": 19748 }, { "epoch": 2.87, "grad_norm": 8.587037086486816, "learning_rate": 1.0745247217774923e-08, "loss": 0.3104, "step": 19749 }, { "epoch": 2.87, "grad_norm": 8.822355270385742, "learning_rate": 1.0722118375122491e-08, "loss": 0.3234, "step": 19750 }, { "epoch": 2.87, "grad_norm": 7.748312950134277, "learning_rate": 1.0699014317219867e-08, "loss": 0.3699, "step": 19751 }, { "epoch": 2.87, "grad_norm": 7.828462600708008, "learning_rate": 1.0675935044646035e-08, "loss": 0.3296, "step": 19752 }, { "epoch": 2.87, "grad_norm": 9.181719779968262, "learning_rate": 1.0652880557979082e-08, "loss": 0.401, "step": 19753 }, { "epoch": 2.87, "grad_norm": 8.706982612609863, "learning_rate": 1.0629850857796885e-08, "loss": 0.3645, "step": 19754 }, { "epoch": 2.87, "grad_norm": 7.595800399780273, "learning_rate": 1.060684594467598e-08, "loss": 0.3271, "step": 19755 }, { "epoch": 2.87, "grad_norm": 10.44835376739502, "learning_rate": 1.0583865819193127e-08, "loss": 0.3589, "step": 19756 }, { "epoch": 2.87, "grad_norm": 9.451088905334473, "learning_rate": 1.0560910481923757e-08, "loss": 0.3798, "step": 19757 }, { "epoch": 2.87, "grad_norm": 8.60804271697998, "learning_rate": 1.0537979933443187e-08, "loss": 0.3242, "step": 19758 }, { "epoch": 2.87, "grad_norm": 9.188627243041992, "learning_rate": 1.0515074174325845e-08, "loss": 0.4089, "step": 19759 }, { "epoch": 2.87, "grad_norm": 9.72882080078125, "learning_rate": 1.0492193205145494e-08, "loss": 0.3805, "step": 19760 }, { "epoch": 2.87, "grad_norm": 8.397058486938477, "learning_rate": 1.0469337026475566e-08, "loss": 0.3415, "step": 19761 }, { "epoch": 2.87, "grad_norm": 8.615753173828125, "learning_rate": 1.0446505638888382e-08, "loss": 0.4184, "step": 19762 }, { "epoch": 2.87, "grad_norm": 7.883391380310059, "learning_rate": 1.0423699042956369e-08, "loss": 0.3146, "step": 19763 }, { "epoch": 2.87, "grad_norm": 9.867993354797363, "learning_rate": 1.0400917239250518e-08, "loss": 0.3374, "step": 19764 }, { "epoch": 2.87, "grad_norm": 8.414444923400879, "learning_rate": 1.0378160228341926e-08, "loss": 0.3858, "step": 19765 }, { "epoch": 2.87, "grad_norm": 10.623294830322266, "learning_rate": 1.035542801080047e-08, "loss": 0.357, "step": 19766 }, { "epoch": 2.87, "grad_norm": 9.21965217590332, "learning_rate": 1.0332720587195809e-08, "loss": 0.3545, "step": 19767 }, { "epoch": 2.87, "grad_norm": 9.685429573059082, "learning_rate": 1.0310037958096706e-08, "loss": 0.3702, "step": 19768 }, { "epoch": 2.87, "grad_norm": 9.146484375, "learning_rate": 1.0287380124071598e-08, "loss": 0.3913, "step": 19769 }, { "epoch": 2.87, "grad_norm": 8.250457763671875, "learning_rate": 1.026474708568803e-08, "loss": 0.3323, "step": 19770 }, { "epoch": 2.87, "grad_norm": 7.667627334594727, "learning_rate": 1.0242138843512994e-08, "loss": 0.3402, "step": 19771 }, { "epoch": 2.87, "grad_norm": 9.23197078704834, "learning_rate": 1.0219555398113145e-08, "loss": 0.381, "step": 19772 }, { "epoch": 2.87, "grad_norm": 8.060892105102539, "learning_rate": 1.0196996750054031e-08, "loss": 0.3496, "step": 19773 }, { "epoch": 2.87, "grad_norm": 8.087538719177246, "learning_rate": 1.017446289990087e-08, "loss": 0.2574, "step": 19774 }, { "epoch": 2.87, "grad_norm": 9.206782341003418, "learning_rate": 1.0151953848218319e-08, "loss": 0.4095, "step": 19775 }, { "epoch": 2.87, "grad_norm": 7.531438827514648, "learning_rate": 1.012946959557015e-08, "loss": 0.3592, "step": 19776 }, { "epoch": 2.87, "grad_norm": 8.594879150390625, "learning_rate": 1.01070101425198e-08, "loss": 0.3142, "step": 19777 }, { "epoch": 2.87, "grad_norm": 8.888713836669922, "learning_rate": 1.0084575489629821e-08, "loss": 0.3276, "step": 19778 }, { "epoch": 2.87, "grad_norm": 9.973753929138184, "learning_rate": 1.006216563746254e-08, "loss": 0.3935, "step": 19779 }, { "epoch": 2.87, "grad_norm": 9.536447525024414, "learning_rate": 1.0039780586579061e-08, "loss": 0.364, "step": 19780 }, { "epoch": 2.87, "grad_norm": 9.596410751342773, "learning_rate": 1.0017420337540495e-08, "loss": 0.4053, "step": 19781 }, { "epoch": 2.87, "grad_norm": 9.25525951385498, "learning_rate": 9.995084890906724e-09, "loss": 0.3566, "step": 19782 }, { "epoch": 2.87, "grad_norm": 9.032898902893066, "learning_rate": 9.972774247237636e-09, "loss": 0.3096, "step": 19783 }, { "epoch": 2.87, "grad_norm": 9.144721031188965, "learning_rate": 9.950488407092117e-09, "loss": 0.3644, "step": 19784 }, { "epoch": 2.87, "grad_norm": 8.171449661254883, "learning_rate": 9.928227371028275e-09, "loss": 0.3682, "step": 19785 }, { "epoch": 2.87, "grad_norm": 8.049530029296875, "learning_rate": 9.905991139604109e-09, "loss": 0.3817, "step": 19786 }, { "epoch": 2.87, "grad_norm": 9.470355987548828, "learning_rate": 9.883779713376616e-09, "loss": 0.3222, "step": 19787 }, { "epoch": 2.87, "grad_norm": 9.730791091918945, "learning_rate": 9.861593092902243e-09, "loss": 0.3774, "step": 19788 }, { "epoch": 2.87, "grad_norm": 8.345190048217773, "learning_rate": 9.839431278736875e-09, "loss": 0.3653, "step": 19789 }, { "epoch": 2.87, "grad_norm": 8.924525260925293, "learning_rate": 9.817294271435628e-09, "loss": 0.3275, "step": 19790 }, { "epoch": 2.87, "grad_norm": 8.035770416259766, "learning_rate": 9.795182071553276e-09, "loss": 0.3487, "step": 19791 }, { "epoch": 2.87, "grad_norm": 11.136147499084473, "learning_rate": 9.773094679643601e-09, "loss": 0.4348, "step": 19792 }, { "epoch": 2.87, "grad_norm": 8.676873207092285, "learning_rate": 9.75103209626027e-09, "loss": 0.3074, "step": 19793 }, { "epoch": 2.87, "grad_norm": 9.19143009185791, "learning_rate": 9.72899432195573e-09, "loss": 0.3806, "step": 19794 }, { "epoch": 2.87, "grad_norm": 9.361791610717773, "learning_rate": 9.706981357282207e-09, "loss": 0.3821, "step": 19795 }, { "epoch": 2.87, "grad_norm": 8.244582176208496, "learning_rate": 9.684993202791258e-09, "loss": 0.288, "step": 19796 }, { "epoch": 2.87, "grad_norm": 8.23279094696045, "learning_rate": 9.663029859033667e-09, "loss": 0.3386, "step": 19797 }, { "epoch": 2.87, "grad_norm": 9.570161819458008, "learning_rate": 9.64109132655988e-09, "loss": 0.3429, "step": 19798 }, { "epoch": 2.87, "grad_norm": 9.2044677734375, "learning_rate": 9.619177605919126e-09, "loss": 0.4163, "step": 19799 }, { "epoch": 2.87, "grad_norm": 9.414355278015137, "learning_rate": 9.597288697660966e-09, "loss": 0.3635, "step": 19800 }, { "epoch": 2.87, "grad_norm": 8.805850982666016, "learning_rate": 9.575424602333292e-09, "loss": 0.3727, "step": 19801 }, { "epoch": 2.87, "grad_norm": 8.41767406463623, "learning_rate": 9.553585320484115e-09, "loss": 0.3337, "step": 19802 }, { "epoch": 2.87, "grad_norm": 9.467671394348145, "learning_rate": 9.531770852660659e-09, "loss": 0.3814, "step": 19803 }, { "epoch": 2.87, "grad_norm": 8.35516357421875, "learning_rate": 9.509981199409267e-09, "loss": 0.3169, "step": 19804 }, { "epoch": 2.87, "grad_norm": 9.438541412353516, "learning_rate": 9.488216361275836e-09, "loss": 0.3839, "step": 19805 }, { "epoch": 2.87, "grad_norm": 8.822521209716797, "learning_rate": 9.466476338805818e-09, "loss": 0.3334, "step": 19806 }, { "epoch": 2.87, "grad_norm": 8.679253578186035, "learning_rate": 9.444761132543776e-09, "loss": 0.3524, "step": 19807 }, { "epoch": 2.87, "grad_norm": 9.078418731689453, "learning_rate": 9.423070743033834e-09, "loss": 0.3838, "step": 19808 }, { "epoch": 2.87, "grad_norm": 7.685924530029297, "learning_rate": 9.401405170819221e-09, "loss": 0.3293, "step": 19809 }, { "epoch": 2.87, "grad_norm": 8.504890441894531, "learning_rate": 9.379764416442947e-09, "loss": 0.3519, "step": 19810 }, { "epoch": 2.87, "grad_norm": 8.706205368041992, "learning_rate": 9.358148480447026e-09, "loss": 0.3313, "step": 19811 }, { "epoch": 2.87, "grad_norm": 7.898960590362549, "learning_rate": 9.336557363373131e-09, "loss": 0.3403, "step": 19812 }, { "epoch": 2.87, "grad_norm": 8.653070449829102, "learning_rate": 9.314991065762056e-09, "loss": 0.3791, "step": 19813 }, { "epoch": 2.87, "grad_norm": 8.182449340820312, "learning_rate": 9.293449588154256e-09, "loss": 0.362, "step": 19814 }, { "epoch": 2.88, "grad_norm": 8.73695182800293, "learning_rate": 9.271932931089299e-09, "loss": 0.3192, "step": 19815 }, { "epoch": 2.88, "grad_norm": 9.478721618652344, "learning_rate": 9.250441095106198e-09, "loss": 0.3128, "step": 19816 }, { "epoch": 2.88, "grad_norm": 7.117577075958252, "learning_rate": 9.228974080743746e-09, "loss": 0.3437, "step": 19817 }, { "epoch": 2.88, "grad_norm": 8.793280601501465, "learning_rate": 9.207531888539289e-09, "loss": 0.3449, "step": 19818 }, { "epoch": 2.88, "grad_norm": 8.731740951538086, "learning_rate": 9.186114519030397e-09, "loss": 0.3358, "step": 19819 }, { "epoch": 2.88, "grad_norm": 9.418249130249023, "learning_rate": 9.164721972753308e-09, "loss": 0.2997, "step": 19820 }, { "epoch": 2.88, "grad_norm": 7.969393730163574, "learning_rate": 9.143354250244373e-09, "loss": 0.3623, "step": 19821 }, { "epoch": 2.88, "grad_norm": 9.080314636230469, "learning_rate": 9.122011352038717e-09, "loss": 0.3557, "step": 19822 }, { "epoch": 2.88, "grad_norm": 8.294767379760742, "learning_rate": 9.100693278671024e-09, "loss": 0.3406, "step": 19823 }, { "epoch": 2.88, "grad_norm": 8.190681457519531, "learning_rate": 9.07940003067531e-09, "loss": 0.3516, "step": 19824 }, { "epoch": 2.88, "grad_norm": 9.178001403808594, "learning_rate": 9.058131608585263e-09, "loss": 0.3475, "step": 19825 }, { "epoch": 2.88, "grad_norm": 8.24006462097168, "learning_rate": 9.036888012933564e-09, "loss": 0.3069, "step": 19826 }, { "epoch": 2.88, "grad_norm": 7.556852340698242, "learning_rate": 9.015669244252566e-09, "loss": 0.3138, "step": 19827 }, { "epoch": 2.88, "grad_norm": 7.778314113616943, "learning_rate": 8.994475303073734e-09, "loss": 0.2578, "step": 19828 }, { "epoch": 2.88, "grad_norm": 9.826549530029297, "learning_rate": 8.973306189928198e-09, "loss": 0.3965, "step": 19829 }, { "epoch": 2.88, "grad_norm": 8.106266021728516, "learning_rate": 8.95216190534609e-09, "loss": 0.3168, "step": 19830 }, { "epoch": 2.88, "grad_norm": 9.683708190917969, "learning_rate": 8.931042449857318e-09, "loss": 0.4377, "step": 19831 }, { "epoch": 2.88, "grad_norm": 8.852725982666016, "learning_rate": 8.909947823991015e-09, "loss": 0.3342, "step": 19832 }, { "epoch": 2.88, "grad_norm": 10.163613319396973, "learning_rate": 8.888878028275537e-09, "loss": 0.3939, "step": 19833 }, { "epoch": 2.88, "grad_norm": 8.261862754821777, "learning_rate": 8.867833063238906e-09, "loss": 0.367, "step": 19834 }, { "epoch": 2.88, "grad_norm": 8.353055953979492, "learning_rate": 8.846812929408254e-09, "loss": 0.397, "step": 19835 }, { "epoch": 2.88, "grad_norm": 8.92757511138916, "learning_rate": 8.825817627310272e-09, "loss": 0.3626, "step": 19836 }, { "epoch": 2.88, "grad_norm": 9.414368629455566, "learning_rate": 8.804847157470985e-09, "loss": 0.407, "step": 19837 }, { "epoch": 2.88, "grad_norm": 10.449591636657715, "learning_rate": 8.783901520415638e-09, "loss": 0.4625, "step": 19838 }, { "epoch": 2.88, "grad_norm": 9.475525856018066, "learning_rate": 8.762980716669144e-09, "loss": 0.3317, "step": 19839 }, { "epoch": 2.88, "grad_norm": 8.649835586547852, "learning_rate": 8.742084746755529e-09, "loss": 0.3964, "step": 19840 }, { "epoch": 2.88, "grad_norm": 9.12120246887207, "learning_rate": 8.721213611198376e-09, "loss": 0.435, "step": 19841 }, { "epoch": 2.88, "grad_norm": 9.715078353881836, "learning_rate": 8.700367310520596e-09, "loss": 0.3785, "step": 19842 }, { "epoch": 2.88, "grad_norm": 8.014384269714355, "learning_rate": 8.67954584524444e-09, "loss": 0.3626, "step": 19843 }, { "epoch": 2.88, "grad_norm": 8.521674156188965, "learning_rate": 8.658749215891492e-09, "loss": 0.3193, "step": 19844 }, { "epoch": 2.88, "grad_norm": 7.6586737632751465, "learning_rate": 8.637977422982779e-09, "loss": 0.3197, "step": 19845 }, { "epoch": 2.88, "grad_norm": 8.302583694458008, "learning_rate": 8.617230467038773e-09, "loss": 0.3623, "step": 19846 }, { "epoch": 2.88, "grad_norm": 11.182988166809082, "learning_rate": 8.59650834857928e-09, "loss": 0.4304, "step": 19847 }, { "epoch": 2.88, "grad_norm": 9.37667465209961, "learning_rate": 8.575811068123329e-09, "loss": 0.3811, "step": 19848 }, { "epoch": 2.88, "grad_norm": 8.18403434753418, "learning_rate": 8.555138626189618e-09, "loss": 0.3176, "step": 19849 }, { "epoch": 2.88, "grad_norm": 7.916362762451172, "learning_rate": 8.534491023295954e-09, "loss": 0.3151, "step": 19850 }, { "epoch": 2.88, "grad_norm": 8.693169593811035, "learning_rate": 8.513868259959588e-09, "loss": 0.2933, "step": 19851 }, { "epoch": 2.88, "grad_norm": 7.789682865142822, "learning_rate": 8.493270336697334e-09, "loss": 0.331, "step": 19852 }, { "epoch": 2.88, "grad_norm": 9.312214851379395, "learning_rate": 8.472697254025107e-09, "loss": 0.3494, "step": 19853 }, { "epoch": 2.88, "grad_norm": 9.860739707946777, "learning_rate": 8.452149012458498e-09, "loss": 0.425, "step": 19854 }, { "epoch": 2.88, "grad_norm": 10.672203063964844, "learning_rate": 8.431625612512095e-09, "loss": 0.4295, "step": 19855 }, { "epoch": 2.88, "grad_norm": 9.2827787399292, "learning_rate": 8.411127054700262e-09, "loss": 0.3938, "step": 19856 }, { "epoch": 2.88, "grad_norm": 9.03451156616211, "learning_rate": 8.390653339536369e-09, "loss": 0.334, "step": 19857 }, { "epoch": 2.88, "grad_norm": 9.306791305541992, "learning_rate": 8.370204467533559e-09, "loss": 0.4213, "step": 19858 }, { "epoch": 2.88, "grad_norm": 9.330031394958496, "learning_rate": 8.34978043920398e-09, "loss": 0.3746, "step": 19859 }, { "epoch": 2.88, "grad_norm": 9.644021034240723, "learning_rate": 8.329381255059442e-09, "loss": 0.3938, "step": 19860 }, { "epoch": 2.88, "grad_norm": 8.974752426147461, "learning_rate": 8.309006915610983e-09, "loss": 0.3622, "step": 19861 }, { "epoch": 2.88, "grad_norm": 8.760119438171387, "learning_rate": 8.288657421368973e-09, "loss": 0.3588, "step": 19862 }, { "epoch": 2.88, "grad_norm": 7.94216775894165, "learning_rate": 8.268332772843222e-09, "loss": 0.3289, "step": 19863 }, { "epoch": 2.88, "grad_norm": 9.248822212219238, "learning_rate": 8.248032970543107e-09, "loss": 0.3838, "step": 19864 }, { "epoch": 2.88, "grad_norm": 8.855218887329102, "learning_rate": 8.227758014977104e-09, "loss": 0.331, "step": 19865 }, { "epoch": 2.88, "grad_norm": 8.22131633758545, "learning_rate": 8.207507906653144e-09, "loss": 0.3592, "step": 19866 }, { "epoch": 2.88, "grad_norm": 9.555746078491211, "learning_rate": 8.187282646078486e-09, "loss": 0.3698, "step": 19867 }, { "epoch": 2.88, "grad_norm": 8.522954940795898, "learning_rate": 8.16708223376006e-09, "loss": 0.3147, "step": 19868 }, { "epoch": 2.88, "grad_norm": 8.955160140991211, "learning_rate": 8.14690667020379e-09, "loss": 0.3949, "step": 19869 }, { "epoch": 2.88, "grad_norm": 8.325077056884766, "learning_rate": 8.126755955915165e-09, "loss": 0.3935, "step": 19870 }, { "epoch": 2.88, "grad_norm": 9.102813720703125, "learning_rate": 8.106630091399114e-09, "loss": 0.3825, "step": 19871 }, { "epoch": 2.88, "grad_norm": 8.687568664550781, "learning_rate": 8.086529077159565e-09, "loss": 0.3624, "step": 19872 }, { "epoch": 2.88, "grad_norm": 8.473395347595215, "learning_rate": 8.06645291370056e-09, "loss": 0.3229, "step": 19873 }, { "epoch": 2.88, "grad_norm": 8.515007972717285, "learning_rate": 8.04640160152481e-09, "loss": 0.3141, "step": 19874 }, { "epoch": 2.88, "grad_norm": 8.54608154296875, "learning_rate": 8.02637514113469e-09, "loss": 0.3135, "step": 19875 }, { "epoch": 2.88, "grad_norm": 9.859235763549805, "learning_rate": 8.006373533032017e-09, "loss": 0.4238, "step": 19876 }, { "epoch": 2.88, "grad_norm": 8.556303024291992, "learning_rate": 7.98639677771773e-09, "loss": 0.3601, "step": 19877 }, { "epoch": 2.88, "grad_norm": 9.882295608520508, "learning_rate": 7.966444875692535e-09, "loss": 0.3425, "step": 19878 }, { "epoch": 2.88, "grad_norm": 9.167625427246094, "learning_rate": 7.946517827456034e-09, "loss": 0.3769, "step": 19879 }, { "epoch": 2.88, "grad_norm": 8.379454612731934, "learning_rate": 7.926615633507715e-09, "loss": 0.2698, "step": 19880 }, { "epoch": 2.88, "grad_norm": 9.992100715637207, "learning_rate": 7.906738294346072e-09, "loss": 0.4066, "step": 19881 }, { "epoch": 2.88, "grad_norm": 8.978821754455566, "learning_rate": 7.886885810469147e-09, "loss": 0.3698, "step": 19882 }, { "epoch": 2.88, "grad_norm": 8.356844902038574, "learning_rate": 7.86705818237421e-09, "loss": 0.4212, "step": 19883 }, { "epoch": 2.89, "grad_norm": 7.752682685852051, "learning_rate": 7.847255410558196e-09, "loss": 0.2983, "step": 19884 }, { "epoch": 2.89, "grad_norm": 7.829905986785889, "learning_rate": 7.827477495517043e-09, "loss": 0.331, "step": 19885 }, { "epoch": 2.89, "grad_norm": 10.040504455566406, "learning_rate": 7.807724437746244e-09, "loss": 0.4708, "step": 19886 }, { "epoch": 2.89, "grad_norm": 9.051514625549316, "learning_rate": 7.787996237740956e-09, "loss": 0.3345, "step": 19887 }, { "epoch": 2.89, "grad_norm": 8.038912773132324, "learning_rate": 7.76829289599512e-09, "loss": 0.3491, "step": 19888 }, { "epoch": 2.89, "grad_norm": 7.097222805023193, "learning_rate": 7.748614413002563e-09, "loss": 0.2994, "step": 19889 }, { "epoch": 2.89, "grad_norm": 7.161569118499756, "learning_rate": 7.728960789256112e-09, "loss": 0.3119, "step": 19890 }, { "epoch": 2.89, "grad_norm": 8.455022811889648, "learning_rate": 7.709332025248484e-09, "loss": 0.2751, "step": 19891 }, { "epoch": 2.89, "grad_norm": 8.157697677612305, "learning_rate": 7.689728121471062e-09, "loss": 0.3747, "step": 19892 }, { "epoch": 2.89, "grad_norm": 7.65901517868042, "learning_rate": 7.670149078415233e-09, "loss": 0.3405, "step": 19893 }, { "epoch": 2.89, "grad_norm": 7.5076446533203125, "learning_rate": 7.650594896571494e-09, "loss": 0.3306, "step": 19894 }, { "epoch": 2.89, "grad_norm": 8.46793270111084, "learning_rate": 7.631065576429563e-09, "loss": 0.3436, "step": 19895 }, { "epoch": 2.89, "grad_norm": 8.667125701904297, "learning_rate": 7.611561118478938e-09, "loss": 0.3636, "step": 19896 }, { "epoch": 2.89, "grad_norm": 8.630230903625488, "learning_rate": 7.592081523208227e-09, "loss": 0.3571, "step": 19897 }, { "epoch": 2.89, "grad_norm": 8.736540794372559, "learning_rate": 7.572626791105374e-09, "loss": 0.3195, "step": 19898 }, { "epoch": 2.89, "grad_norm": 9.507211685180664, "learning_rate": 7.553196922657878e-09, "loss": 0.3595, "step": 19899 }, { "epoch": 2.89, "grad_norm": 8.547283172607422, "learning_rate": 7.53379191835246e-09, "loss": 0.3466, "step": 19900 }, { "epoch": 2.89, "grad_norm": 8.734315872192383, "learning_rate": 7.514411778675179e-09, "loss": 0.3688, "step": 19901 }, { "epoch": 2.89, "grad_norm": 7.601528167724609, "learning_rate": 7.495056504111864e-09, "loss": 0.2782, "step": 19902 }, { "epoch": 2.89, "grad_norm": 8.217657089233398, "learning_rate": 7.475726095147127e-09, "loss": 0.3418, "step": 19903 }, { "epoch": 2.89, "grad_norm": 9.513028144836426, "learning_rate": 7.456420552265474e-09, "loss": 0.3423, "step": 19904 }, { "epoch": 2.89, "grad_norm": 7.79100227355957, "learning_rate": 7.437139875950516e-09, "loss": 0.327, "step": 19905 }, { "epoch": 2.89, "grad_norm": 8.161787986755371, "learning_rate": 7.417884066685198e-09, "loss": 0.3347, "step": 19906 }, { "epoch": 2.89, "grad_norm": 8.820341110229492, "learning_rate": 7.398653124952026e-09, "loss": 0.355, "step": 19907 }, { "epoch": 2.89, "grad_norm": 8.322555541992188, "learning_rate": 7.379447051232835e-09, "loss": 0.3461, "step": 19908 }, { "epoch": 2.89, "grad_norm": 8.831561088562012, "learning_rate": 7.3602658460086846e-09, "loss": 0.4007, "step": 19909 }, { "epoch": 2.89, "grad_norm": 11.559843063354492, "learning_rate": 7.3411095097603014e-09, "loss": 0.442, "step": 19910 }, { "epoch": 2.89, "grad_norm": 7.978484153747559, "learning_rate": 7.321978042967414e-09, "loss": 0.3934, "step": 19911 }, { "epoch": 2.89, "grad_norm": 8.3094482421875, "learning_rate": 7.302871446109526e-09, "loss": 0.3886, "step": 19912 }, { "epoch": 2.89, "grad_norm": 8.690425872802734, "learning_rate": 7.283789719665256e-09, "loss": 0.3639, "step": 19913 }, { "epoch": 2.89, "grad_norm": 7.366077423095703, "learning_rate": 7.264732864112555e-09, "loss": 0.2938, "step": 19914 }, { "epoch": 2.89, "grad_norm": 6.783942222595215, "learning_rate": 7.245700879928929e-09, "loss": 0.3109, "step": 19915 }, { "epoch": 2.89, "grad_norm": 8.318929672241211, "learning_rate": 7.226693767591219e-09, "loss": 0.3053, "step": 19916 }, { "epoch": 2.89, "grad_norm": 9.764542579650879, "learning_rate": 7.207711527575711e-09, "loss": 0.3484, "step": 19917 }, { "epoch": 2.89, "grad_norm": 7.948555946350098, "learning_rate": 7.188754160357802e-09, "loss": 0.3239, "step": 19918 }, { "epoch": 2.89, "grad_norm": 11.156346321105957, "learning_rate": 7.1698216664124455e-09, "loss": 0.4397, "step": 19919 }, { "epoch": 2.89, "grad_norm": 8.25571346282959, "learning_rate": 7.150914046214151e-09, "loss": 0.3385, "step": 19920 }, { "epoch": 2.89, "grad_norm": 9.251608848571777, "learning_rate": 7.1320313002364296e-09, "loss": 0.335, "step": 19921 }, { "epoch": 2.89, "grad_norm": 9.107950210571289, "learning_rate": 7.113173428952457e-09, "loss": 0.3936, "step": 19922 }, { "epoch": 2.89, "grad_norm": 8.929891586303711, "learning_rate": 7.094340432834633e-09, "loss": 0.3199, "step": 19923 }, { "epoch": 2.89, "grad_norm": 8.063714981079102, "learning_rate": 7.075532312354804e-09, "loss": 0.378, "step": 19924 }, { "epoch": 2.89, "grad_norm": 8.989862442016602, "learning_rate": 7.056749067984147e-09, "loss": 0.3473, "step": 19925 }, { "epoch": 2.89, "grad_norm": 9.630715370178223, "learning_rate": 7.037990700193286e-09, "loss": 0.3485, "step": 19926 }, { "epoch": 2.89, "grad_norm": 8.47775650024414, "learning_rate": 7.01925720945229e-09, "loss": 0.305, "step": 19927 }, { "epoch": 2.89, "grad_norm": 9.128499984741211, "learning_rate": 7.000548596230227e-09, "loss": 0.3387, "step": 19928 }, { "epoch": 2.89, "grad_norm": 7.571318626403809, "learning_rate": 6.981864860996056e-09, "loss": 0.2874, "step": 19929 }, { "epoch": 2.89, "grad_norm": 9.679981231689453, "learning_rate": 6.963206004217736e-09, "loss": 0.3853, "step": 19930 }, { "epoch": 2.89, "grad_norm": 9.140405654907227, "learning_rate": 6.9445720263627825e-09, "loss": 0.418, "step": 19931 }, { "epoch": 2.89, "grad_norm": 8.958308219909668, "learning_rate": 6.9259629278980434e-09, "loss": 0.3576, "step": 19932 }, { "epoch": 2.89, "grad_norm": 8.176609992980957, "learning_rate": 6.907378709289702e-09, "loss": 0.3186, "step": 19933 }, { "epoch": 2.89, "grad_norm": 8.927977561950684, "learning_rate": 6.888819371003385e-09, "loss": 0.3295, "step": 19934 }, { "epoch": 2.89, "grad_norm": 7.924546241760254, "learning_rate": 6.870284913504054e-09, "loss": 0.3271, "step": 19935 }, { "epoch": 2.89, "grad_norm": 8.5985107421875, "learning_rate": 6.851775337256116e-09, "loss": 0.3676, "step": 19936 }, { "epoch": 2.89, "grad_norm": 8.172640800476074, "learning_rate": 6.833290642723199e-09, "loss": 0.3369, "step": 19937 }, { "epoch": 2.89, "grad_norm": 8.72879695892334, "learning_rate": 6.8148308303684894e-09, "loss": 0.3121, "step": 19938 }, { "epoch": 2.89, "grad_norm": 8.570998191833496, "learning_rate": 6.796395900654395e-09, "loss": 0.3188, "step": 19939 }, { "epoch": 2.89, "grad_norm": 9.572062492370605, "learning_rate": 6.777985854042878e-09, "loss": 0.3349, "step": 19940 }, { "epoch": 2.89, "grad_norm": 8.943159103393555, "learning_rate": 6.7596006909951265e-09, "loss": 0.3516, "step": 19941 }, { "epoch": 2.89, "grad_norm": 8.35579776763916, "learning_rate": 6.74124041197166e-09, "loss": 0.3511, "step": 19942 }, { "epoch": 2.89, "grad_norm": 8.430213928222656, "learning_rate": 6.722905017432667e-09, "loss": 0.3028, "step": 19943 }, { "epoch": 2.89, "grad_norm": 8.736331939697266, "learning_rate": 6.7045945078372246e-09, "loss": 0.3404, "step": 19944 }, { "epoch": 2.89, "grad_norm": 8.276983261108398, "learning_rate": 6.686308883644298e-09, "loss": 0.353, "step": 19945 }, { "epoch": 2.89, "grad_norm": 8.045205116271973, "learning_rate": 6.668048145311856e-09, "loss": 0.336, "step": 19946 }, { "epoch": 2.89, "grad_norm": 8.018271446228027, "learning_rate": 6.64981229329753e-09, "loss": 0.3686, "step": 19947 }, { "epoch": 2.89, "grad_norm": 8.439992904663086, "learning_rate": 6.6316013280582895e-09, "loss": 0.3598, "step": 19948 }, { "epoch": 2.89, "grad_norm": 8.757218360900879, "learning_rate": 6.613415250049992e-09, "loss": 0.2954, "step": 19949 }, { "epoch": 2.89, "grad_norm": 8.095966339111328, "learning_rate": 6.595254059728606e-09, "loss": 0.3268, "step": 19950 }, { "epoch": 2.89, "grad_norm": 9.195304870605469, "learning_rate": 6.577117757548989e-09, "loss": 0.3401, "step": 19951 }, { "epoch": 2.89, "grad_norm": 8.868273735046387, "learning_rate": 6.559006343965557e-09, "loss": 0.3961, "step": 19952 }, { "epoch": 2.9, "grad_norm": 10.21488094329834, "learning_rate": 6.5409198194320555e-09, "loss": 0.466, "step": 19953 }, { "epoch": 2.9, "grad_norm": 8.29235553741455, "learning_rate": 6.522858184401569e-09, "loss": 0.3568, "step": 19954 }, { "epoch": 2.9, "grad_norm": 8.682379722595215, "learning_rate": 6.504821439326735e-09, "loss": 0.3692, "step": 19955 }, { "epoch": 2.9, "grad_norm": 9.092355728149414, "learning_rate": 6.486809584659192e-09, "loss": 0.3906, "step": 19956 }, { "epoch": 2.9, "grad_norm": 6.448853015899658, "learning_rate": 6.468822620850467e-09, "loss": 0.2658, "step": 19957 }, { "epoch": 2.9, "grad_norm": 8.680220603942871, "learning_rate": 6.450860548350978e-09, "loss": 0.3831, "step": 19958 }, { "epoch": 2.9, "grad_norm": 10.426301956176758, "learning_rate": 6.432923367610921e-09, "loss": 0.3804, "step": 19959 }, { "epoch": 2.9, "grad_norm": 9.649085998535156, "learning_rate": 6.4150110790796015e-09, "loss": 0.3824, "step": 19960 }, { "epoch": 2.9, "grad_norm": 8.364910125732422, "learning_rate": 6.397123683205663e-09, "loss": 0.3517, "step": 19961 }, { "epoch": 2.9, "grad_norm": 7.663186073303223, "learning_rate": 6.379261180437523e-09, "loss": 0.3085, "step": 19962 }, { "epoch": 2.9, "grad_norm": 8.110832214355469, "learning_rate": 6.36142357122238e-09, "loss": 0.3408, "step": 19963 }, { "epoch": 2.9, "grad_norm": 8.973814964294434, "learning_rate": 6.3436108560073194e-09, "loss": 0.3805, "step": 19964 }, { "epoch": 2.9, "grad_norm": 8.224814414978027, "learning_rate": 6.325823035238542e-09, "loss": 0.3531, "step": 19965 }, { "epoch": 2.9, "grad_norm": 8.102692604064941, "learning_rate": 6.308060109361801e-09, "loss": 0.3715, "step": 19966 }, { "epoch": 2.9, "grad_norm": 8.292587280273438, "learning_rate": 6.290322078821963e-09, "loss": 0.3463, "step": 19967 }, { "epoch": 2.9, "grad_norm": 7.907379150390625, "learning_rate": 6.272608944063451e-09, "loss": 0.3539, "step": 19968 }, { "epoch": 2.9, "grad_norm": 11.160233497619629, "learning_rate": 6.2549207055302425e-09, "loss": 0.3976, "step": 19969 }, { "epoch": 2.9, "grad_norm": 8.57232666015625, "learning_rate": 6.237257363665205e-09, "loss": 0.3625, "step": 19970 }, { "epoch": 2.9, "grad_norm": 8.439560890197754, "learning_rate": 6.219618918910985e-09, "loss": 0.3478, "step": 19971 }, { "epoch": 2.9, "grad_norm": 9.237717628479004, "learning_rate": 6.202005371709451e-09, "loss": 0.3832, "step": 19972 }, { "epoch": 2.9, "grad_norm": 8.2882080078125, "learning_rate": 6.184416722502028e-09, "loss": 0.3499, "step": 19973 }, { "epoch": 2.9, "grad_norm": 7.099869251251221, "learning_rate": 6.166852971729253e-09, "loss": 0.328, "step": 19974 }, { "epoch": 2.9, "grad_norm": 9.496164321899414, "learning_rate": 6.149314119831106e-09, "loss": 0.3341, "step": 19975 }, { "epoch": 2.9, "grad_norm": 8.115970611572266, "learning_rate": 6.131800167247125e-09, "loss": 0.3539, "step": 19976 }, { "epoch": 2.9, "grad_norm": 8.548025131225586, "learning_rate": 6.11431111441596e-09, "loss": 0.3076, "step": 19977 }, { "epoch": 2.9, "grad_norm": 8.718576431274414, "learning_rate": 6.096846961775925e-09, "loss": 0.3972, "step": 19978 }, { "epoch": 2.9, "grad_norm": 8.706890106201172, "learning_rate": 6.0794077097644505e-09, "loss": 0.3748, "step": 19979 }, { "epoch": 2.9, "grad_norm": 9.202080726623535, "learning_rate": 6.0619933588184066e-09, "loss": 0.3584, "step": 19980 }, { "epoch": 2.9, "grad_norm": 7.931127548217773, "learning_rate": 6.044603909374113e-09, "loss": 0.2869, "step": 19981 }, { "epoch": 2.9, "grad_norm": 8.283232688903809, "learning_rate": 6.027239361867331e-09, "loss": 0.3629, "step": 19982 }, { "epoch": 2.9, "grad_norm": 8.793482780456543, "learning_rate": 6.0098997167330465e-09, "loss": 0.3523, "step": 19983 }, { "epoch": 2.9, "grad_norm": 9.761896133422852, "learning_rate": 5.992584974405579e-09, "loss": 0.3821, "step": 19984 }, { "epoch": 2.9, "grad_norm": 10.173652648925781, "learning_rate": 5.975295135318914e-09, "loss": 0.4193, "step": 19985 }, { "epoch": 2.9, "grad_norm": 8.807525634765625, "learning_rate": 5.958030199905928e-09, "loss": 0.3365, "step": 19986 }, { "epoch": 2.9, "grad_norm": 7.439445972442627, "learning_rate": 5.940790168599496e-09, "loss": 0.3179, "step": 19987 }, { "epoch": 2.9, "grad_norm": 8.665779113769531, "learning_rate": 5.9235750418313855e-09, "loss": 0.3846, "step": 19988 }, { "epoch": 2.9, "grad_norm": 10.477921485900879, "learning_rate": 5.906384820032806e-09, "loss": 0.3773, "step": 19989 }, { "epoch": 2.9, "grad_norm": 9.040701866149902, "learning_rate": 5.889219503634635e-09, "loss": 0.3132, "step": 19990 }, { "epoch": 2.9, "grad_norm": 9.208222389221191, "learning_rate": 5.872079093066751e-09, "loss": 0.353, "step": 19991 }, { "epoch": 2.9, "grad_norm": 9.013160705566406, "learning_rate": 5.854963588758588e-09, "loss": 0.3136, "step": 19992 }, { "epoch": 2.9, "grad_norm": 8.125649452209473, "learning_rate": 5.837872991139025e-09, "loss": 0.3374, "step": 19993 }, { "epoch": 2.9, "grad_norm": 8.080147743225098, "learning_rate": 5.820807300636277e-09, "loss": 0.3744, "step": 19994 }, { "epoch": 2.9, "grad_norm": 8.24710750579834, "learning_rate": 5.803766517677777e-09, "loss": 0.3594, "step": 19995 }, { "epoch": 2.9, "grad_norm": 7.616650581359863, "learning_rate": 5.786750642690519e-09, "loss": 0.3335, "step": 19996 }, { "epoch": 2.9, "grad_norm": 8.830134391784668, "learning_rate": 5.769759676100827e-09, "loss": 0.3635, "step": 19997 }, { "epoch": 2.9, "grad_norm": 9.256925582885742, "learning_rate": 5.7527936183342505e-09, "loss": 0.3669, "step": 19998 }, { "epoch": 2.9, "grad_norm": 7.409046173095703, "learning_rate": 5.735852469816116e-09, "loss": 0.3391, "step": 19999 }, { "epoch": 2.9, "grad_norm": 8.969095230102539, "learning_rate": 5.718936230970528e-09, "loss": 0.3585, "step": 20000 }, { "epoch": 2.9, "grad_norm": 8.726757049560547, "learning_rate": 5.702044902221481e-09, "loss": 0.3454, "step": 20001 }, { "epoch": 2.9, "grad_norm": 7.562202453613281, "learning_rate": 5.685178483992192e-09, "loss": 0.2974, "step": 20002 }, { "epoch": 2.9, "grad_norm": 9.43780517578125, "learning_rate": 5.668336976705212e-09, "loss": 0.3661, "step": 20003 }, { "epoch": 2.9, "grad_norm": 8.233880043029785, "learning_rate": 5.6515203807823156e-09, "loss": 0.3204, "step": 20004 }, { "epoch": 2.9, "grad_norm": 8.216845512390137, "learning_rate": 5.6347286966449414e-09, "loss": 0.287, "step": 20005 }, { "epoch": 2.9, "grad_norm": 7.283203601837158, "learning_rate": 5.617961924713865e-09, "loss": 0.2967, "step": 20006 }, { "epoch": 2.9, "grad_norm": 8.774360656738281, "learning_rate": 5.601220065408973e-09, "loss": 0.351, "step": 20007 }, { "epoch": 2.9, "grad_norm": 7.688013553619385, "learning_rate": 5.584503119149819e-09, "loss": 0.3527, "step": 20008 }, { "epoch": 2.9, "grad_norm": 7.799510478973389, "learning_rate": 5.5678110863551785e-09, "loss": 0.306, "step": 20009 }, { "epoch": 2.9, "grad_norm": 8.513298988342285, "learning_rate": 5.551143967443273e-09, "loss": 0.3182, "step": 20010 }, { "epoch": 2.9, "grad_norm": 8.030355453491211, "learning_rate": 5.5345017628315445e-09, "loss": 0.3477, "step": 20011 }, { "epoch": 2.9, "grad_norm": 9.455145835876465, "learning_rate": 5.517884472937107e-09, "loss": 0.3221, "step": 20012 }, { "epoch": 2.9, "grad_norm": 9.833154678344727, "learning_rate": 5.5012920981761805e-09, "loss": 0.3096, "step": 20013 }, { "epoch": 2.9, "grad_norm": 8.57872200012207, "learning_rate": 5.4847246389645445e-09, "loss": 0.3904, "step": 20014 }, { "epoch": 2.9, "grad_norm": 8.722546577453613, "learning_rate": 5.4681820957171995e-09, "loss": 0.3739, "step": 20015 }, { "epoch": 2.9, "grad_norm": 8.120562553405762, "learning_rate": 5.4516644688485936e-09, "loss": 0.3815, "step": 20016 }, { "epoch": 2.9, "grad_norm": 11.372751235961914, "learning_rate": 5.435171758772506e-09, "loss": 0.3803, "step": 20017 }, { "epoch": 2.9, "grad_norm": 10.031023979187012, "learning_rate": 5.418703965902382e-09, "loss": 0.2955, "step": 20018 }, { "epoch": 2.9, "grad_norm": 7.3867998123168945, "learning_rate": 5.402261090650339e-09, "loss": 0.2727, "step": 20019 }, { "epoch": 2.9, "grad_norm": 8.588067054748535, "learning_rate": 5.385843133428824e-09, "loss": 0.3321, "step": 20020 }, { "epoch": 2.9, "grad_norm": 8.365233421325684, "learning_rate": 5.369450094648731e-09, "loss": 0.3578, "step": 20021 }, { "epoch": 2.91, "grad_norm": 8.707784652709961, "learning_rate": 5.353081974721174e-09, "loss": 0.3028, "step": 20022 }, { "epoch": 2.91, "grad_norm": 8.620453834533691, "learning_rate": 5.336738774055938e-09, "loss": 0.3345, "step": 20023 }, { "epoch": 2.91, "grad_norm": 7.788532733917236, "learning_rate": 5.320420493062472e-09, "loss": 0.2637, "step": 20024 }, { "epoch": 2.91, "grad_norm": 9.833124160766602, "learning_rate": 5.304127132149783e-09, "loss": 0.383, "step": 20025 }, { "epoch": 2.91, "grad_norm": 9.399999618530273, "learning_rate": 5.287858691725877e-09, "loss": 0.2852, "step": 20026 }, { "epoch": 2.91, "grad_norm": 8.37215805053711, "learning_rate": 5.2716151721985404e-09, "loss": 0.323, "step": 20027 }, { "epoch": 2.91, "grad_norm": 10.060251235961914, "learning_rate": 5.255396573974447e-09, "loss": 0.3397, "step": 20028 }, { "epoch": 2.91, "grad_norm": 9.53069019317627, "learning_rate": 5.239202897460271e-09, "loss": 0.376, "step": 20029 }, { "epoch": 2.91, "grad_norm": 8.272168159484863, "learning_rate": 5.2230341430615776e-09, "loss": 0.3186, "step": 20030 }, { "epoch": 2.91, "grad_norm": 12.457487106323242, "learning_rate": 5.206890311183265e-09, "loss": 0.4497, "step": 20031 }, { "epoch": 2.91, "grad_norm": 8.674087524414062, "learning_rate": 5.190771402230121e-09, "loss": 0.3875, "step": 20032 }, { "epoch": 2.91, "grad_norm": 8.029484748840332, "learning_rate": 5.174677416605711e-09, "loss": 0.3046, "step": 20033 }, { "epoch": 2.91, "grad_norm": 7.594504356384277, "learning_rate": 5.158608354713268e-09, "loss": 0.3103, "step": 20034 }, { "epoch": 2.91, "grad_norm": 7.725071907043457, "learning_rate": 5.142564216955581e-09, "loss": 0.3022, "step": 20035 }, { "epoch": 2.91, "grad_norm": 8.165740013122559, "learning_rate": 5.126545003734328e-09, "loss": 0.3293, "step": 20036 }, { "epoch": 2.91, "grad_norm": 6.8902974128723145, "learning_rate": 5.110550715451079e-09, "loss": 0.2923, "step": 20037 }, { "epoch": 2.91, "grad_norm": 8.559386253356934, "learning_rate": 5.09458135250651e-09, "loss": 0.3876, "step": 20038 }, { "epoch": 2.91, "grad_norm": 9.347488403320312, "learning_rate": 5.078636915300638e-09, "loss": 0.3525, "step": 20039 }, { "epoch": 2.91, "grad_norm": 9.691479682922363, "learning_rate": 5.062717404232808e-09, "loss": 0.3334, "step": 20040 }, { "epoch": 2.91, "grad_norm": 9.296472549438477, "learning_rate": 5.046822819702145e-09, "loss": 0.3284, "step": 20041 }, { "epoch": 2.91, "grad_norm": 10.008185386657715, "learning_rate": 5.030953162106555e-09, "loss": 0.3663, "step": 20042 }, { "epoch": 2.91, "grad_norm": 8.712240219116211, "learning_rate": 5.015108431843939e-09, "loss": 0.3852, "step": 20043 }, { "epoch": 2.91, "grad_norm": 9.066564559936523, "learning_rate": 4.999288629311094e-09, "loss": 0.3129, "step": 20044 }, { "epoch": 2.91, "grad_norm": 9.318553924560547, "learning_rate": 4.9834937549042555e-09, "loss": 0.3669, "step": 20045 }, { "epoch": 2.91, "grad_norm": 7.973049163818359, "learning_rate": 4.967723809019331e-09, "loss": 0.3215, "step": 20046 }, { "epoch": 2.91, "grad_norm": 9.647229194641113, "learning_rate": 4.951978792051225e-09, "loss": 0.3591, "step": 20047 }, { "epoch": 2.91, "grad_norm": 8.946236610412598, "learning_rate": 4.9362587043946246e-09, "loss": 0.3344, "step": 20048 }, { "epoch": 2.91, "grad_norm": 11.154779434204102, "learning_rate": 4.920563546443212e-09, "loss": 0.3556, "step": 20049 }, { "epoch": 2.91, "grad_norm": 9.12420654296875, "learning_rate": 4.904893318590231e-09, "loss": 0.3356, "step": 20050 }, { "epoch": 2.91, "grad_norm": 8.813653945922852, "learning_rate": 4.889248021228254e-09, "loss": 0.3927, "step": 20051 }, { "epoch": 2.91, "grad_norm": 8.43447208404541, "learning_rate": 4.873627654749301e-09, "loss": 0.3261, "step": 20052 }, { "epoch": 2.91, "grad_norm": 7.431904315948486, "learning_rate": 4.858032219544728e-09, "loss": 0.3092, "step": 20053 }, { "epoch": 2.91, "grad_norm": 7.884160041809082, "learning_rate": 4.84246171600522e-09, "loss": 0.3595, "step": 20054 }, { "epoch": 2.91, "grad_norm": 9.229177474975586, "learning_rate": 4.826916144520909e-09, "loss": 0.3301, "step": 20055 }, { "epoch": 2.91, "grad_norm": 9.205241203308105, "learning_rate": 4.811395505481153e-09, "loss": 0.3476, "step": 20056 }, { "epoch": 2.91, "grad_norm": 7.442590236663818, "learning_rate": 4.79589979927486e-09, "loss": 0.3248, "step": 20057 }, { "epoch": 2.91, "grad_norm": 9.21390438079834, "learning_rate": 4.780429026290389e-09, "loss": 0.3281, "step": 20058 }, { "epoch": 2.91, "grad_norm": 9.802225112915039, "learning_rate": 4.764983186915095e-09, "loss": 0.3443, "step": 20059 }, { "epoch": 2.91, "grad_norm": 8.573139190673828, "learning_rate": 4.7495622815361126e-09, "loss": 0.3572, "step": 20060 }, { "epoch": 2.91, "grad_norm": 9.122749328613281, "learning_rate": 4.734166310539689e-09, "loss": 0.394, "step": 20061 }, { "epoch": 2.91, "grad_norm": 8.020800590515137, "learning_rate": 4.7187952743116265e-09, "loss": 0.305, "step": 20062 }, { "epoch": 2.91, "grad_norm": 9.3118896484375, "learning_rate": 4.703449173236951e-09, "loss": 0.3555, "step": 20063 }, { "epoch": 2.91, "grad_norm": 8.871417999267578, "learning_rate": 4.688128007700243e-09, "loss": 0.3726, "step": 20064 }, { "epoch": 2.91, "grad_norm": 7.8895463943481445, "learning_rate": 4.6728317780851955e-09, "loss": 0.3317, "step": 20065 }, { "epoch": 2.91, "grad_norm": 9.519194602966309, "learning_rate": 4.657560484775169e-09, "loss": 0.3015, "step": 20066 }, { "epoch": 2.91, "grad_norm": 9.011062622070312, "learning_rate": 4.642314128152636e-09, "loss": 0.2951, "step": 20067 }, { "epoch": 2.91, "grad_norm": 8.51343059539795, "learning_rate": 4.627092708599622e-09, "loss": 0.3642, "step": 20068 }, { "epoch": 2.91, "grad_norm": 9.685343742370605, "learning_rate": 4.611896226497603e-09, "loss": 0.3331, "step": 20069 }, { "epoch": 2.91, "grad_norm": 7.845438003540039, "learning_rate": 4.59672468222716e-09, "loss": 0.3202, "step": 20070 }, { "epoch": 2.91, "grad_norm": 7.896834373474121, "learning_rate": 4.581578076168324e-09, "loss": 0.3458, "step": 20071 }, { "epoch": 2.91, "grad_norm": 8.680277824401855, "learning_rate": 4.566456408700792e-09, "loss": 0.3197, "step": 20072 }, { "epoch": 2.91, "grad_norm": 10.17027473449707, "learning_rate": 4.551359680203149e-09, "loss": 0.3811, "step": 20073 }, { "epoch": 2.91, "grad_norm": 7.398698329925537, "learning_rate": 4.53628789105387e-09, "loss": 0.3177, "step": 20074 }, { "epoch": 2.91, "grad_norm": 8.249935150146484, "learning_rate": 4.521241041630319e-09, "loss": 0.3004, "step": 20075 }, { "epoch": 2.91, "grad_norm": 8.923727989196777, "learning_rate": 4.506219132309752e-09, "loss": 0.4097, "step": 20076 }, { "epoch": 2.91, "grad_norm": 9.403091430664062, "learning_rate": 4.4912221634682e-09, "loss": 0.3739, "step": 20077 }, { "epoch": 2.91, "grad_norm": 8.444719314575195, "learning_rate": 4.476250135481696e-09, "loss": 0.3372, "step": 20078 }, { "epoch": 2.91, "grad_norm": 10.97121524810791, "learning_rate": 4.4613030487251625e-09, "loss": 0.4332, "step": 20079 }, { "epoch": 2.91, "grad_norm": 8.17302417755127, "learning_rate": 4.446380903572966e-09, "loss": 0.3659, "step": 20080 }, { "epoch": 2.91, "grad_norm": 8.952252388000488, "learning_rate": 4.431483700399252e-09, "loss": 0.402, "step": 20081 }, { "epoch": 2.91, "grad_norm": 8.080514907836914, "learning_rate": 4.416611439576945e-09, "loss": 0.3021, "step": 20082 }, { "epoch": 2.91, "grad_norm": 9.516528129577637, "learning_rate": 4.401764121478857e-09, "loss": 0.3497, "step": 20083 }, { "epoch": 2.91, "grad_norm": 7.71684455871582, "learning_rate": 4.386941746476802e-09, "loss": 0.3448, "step": 20084 }, { "epoch": 2.91, "grad_norm": 8.049617767333984, "learning_rate": 4.372144314942372e-09, "loss": 0.3325, "step": 20085 }, { "epoch": 2.91, "grad_norm": 8.752511978149414, "learning_rate": 4.357371827246048e-09, "loss": 0.3307, "step": 20086 }, { "epoch": 2.91, "grad_norm": 7.827094554901123, "learning_rate": 4.342624283758089e-09, "loss": 0.3376, "step": 20087 }, { "epoch": 2.91, "grad_norm": 9.498705863952637, "learning_rate": 4.327901684847868e-09, "loss": 0.3268, "step": 20088 }, { "epoch": 2.91, "grad_norm": 8.798917770385742, "learning_rate": 4.3132040308841985e-09, "loss": 0.3774, "step": 20089 }, { "epoch": 2.91, "grad_norm": 8.901571273803711, "learning_rate": 4.298531322235455e-09, "loss": 0.3555, "step": 20090 }, { "epoch": 2.92, "grad_norm": 8.242776870727539, "learning_rate": 4.28388355926923e-09, "loss": 0.3273, "step": 20091 }, { "epoch": 2.92, "grad_norm": 10.402236938476562, "learning_rate": 4.269260742352343e-09, "loss": 0.4082, "step": 20092 }, { "epoch": 2.92, "grad_norm": 8.444494247436523, "learning_rate": 4.254662871851278e-09, "loss": 0.3694, "step": 20093 }, { "epoch": 2.92, "grad_norm": 8.131364822387695, "learning_rate": 4.240089948131631e-09, "loss": 0.3764, "step": 20094 }, { "epoch": 2.92, "grad_norm": 8.693418502807617, "learning_rate": 4.225541971558777e-09, "loss": 0.3116, "step": 20095 }, { "epoch": 2.92, "grad_norm": 8.453374862670898, "learning_rate": 4.211018942496758e-09, "loss": 0.3628, "step": 20096 }, { "epoch": 2.92, "grad_norm": 9.244515419006348, "learning_rate": 4.196520861309838e-09, "loss": 0.3787, "step": 20097 }, { "epoch": 2.92, "grad_norm": 8.069207191467285, "learning_rate": 4.18204772836106e-09, "loss": 0.3378, "step": 20098 }, { "epoch": 2.92, "grad_norm": 9.736974716186523, "learning_rate": 4.167599544013023e-09, "loss": 0.4209, "step": 20099 }, { "epoch": 2.92, "grad_norm": 8.550612449645996, "learning_rate": 4.153176308627771e-09, "loss": 0.3315, "step": 20100 }, { "epoch": 2.92, "grad_norm": 8.494158744812012, "learning_rate": 4.138778022566569e-09, "loss": 0.3737, "step": 20101 }, { "epoch": 2.92, "grad_norm": 8.492968559265137, "learning_rate": 4.1244046861901305e-09, "loss": 0.3311, "step": 20102 }, { "epoch": 2.92, "grad_norm": 8.344034194946289, "learning_rate": 4.110056299858611e-09, "loss": 0.3749, "step": 20103 }, { "epoch": 2.92, "grad_norm": 8.529142379760742, "learning_rate": 4.0957328639315e-09, "loss": 0.3091, "step": 20104 }, { "epoch": 2.92, "grad_norm": 8.389962196350098, "learning_rate": 4.0814343787676234e-09, "loss": 0.3826, "step": 20105 }, { "epoch": 2.92, "grad_norm": 7.282034397125244, "learning_rate": 4.067160844725248e-09, "loss": 0.338, "step": 20106 }, { "epoch": 2.92, "grad_norm": 9.46633529663086, "learning_rate": 4.052912262161978e-09, "loss": 0.4175, "step": 20107 }, { "epoch": 2.92, "grad_norm": 8.655783653259277, "learning_rate": 4.038688631434639e-09, "loss": 0.4322, "step": 20108 }, { "epoch": 2.92, "grad_norm": 10.418702125549316, "learning_rate": 4.024489952899723e-09, "loss": 0.4262, "step": 20109 }, { "epoch": 2.92, "grad_norm": 9.161849021911621, "learning_rate": 4.0103162269129465e-09, "loss": 0.3479, "step": 20110 }, { "epoch": 2.92, "grad_norm": 8.294754028320312, "learning_rate": 3.996167453829358e-09, "loss": 0.3518, "step": 20111 }, { "epoch": 2.92, "grad_norm": 8.089214324951172, "learning_rate": 3.9820436340034515e-09, "loss": 0.3228, "step": 20112 }, { "epoch": 2.92, "grad_norm": 8.790077209472656, "learning_rate": 3.967944767789166e-09, "loss": 0.3906, "step": 20113 }, { "epoch": 2.92, "grad_norm": 9.405752182006836, "learning_rate": 3.953870855539554e-09, "loss": 0.3446, "step": 20114 }, { "epoch": 2.92, "grad_norm": 8.905959129333496, "learning_rate": 3.939821897607332e-09, "loss": 0.3509, "step": 20115 }, { "epoch": 2.92, "grad_norm": 8.650548934936523, "learning_rate": 3.92579789434444e-09, "loss": 0.3657, "step": 20116 }, { "epoch": 2.92, "grad_norm": 8.04150104522705, "learning_rate": 3.911798846102155e-09, "loss": 0.362, "step": 20117 }, { "epoch": 2.92, "grad_norm": 9.451637268066406, "learning_rate": 3.897824753231416e-09, "loss": 0.376, "step": 20118 }, { "epoch": 2.92, "grad_norm": 8.770472526550293, "learning_rate": 3.883875616082055e-09, "loss": 0.3661, "step": 20119 }, { "epoch": 2.92, "grad_norm": 8.55302619934082, "learning_rate": 3.869951435003793e-09, "loss": 0.3529, "step": 20120 }, { "epoch": 2.92, "grad_norm": 9.443482398986816, "learning_rate": 3.856052210345351e-09, "loss": 0.3222, "step": 20121 }, { "epoch": 2.92, "grad_norm": 8.22122859954834, "learning_rate": 3.842177942454894e-09, "loss": 0.3034, "step": 20122 }, { "epoch": 2.92, "grad_norm": 8.746180534362793, "learning_rate": 3.828328631680033e-09, "loss": 0.381, "step": 20123 }, { "epoch": 2.92, "grad_norm": 8.486852645874023, "learning_rate": 3.814504278367825e-09, "loss": 0.3666, "step": 20124 }, { "epoch": 2.92, "grad_norm": 7.556735992431641, "learning_rate": 3.800704882864658e-09, "loss": 0.3451, "step": 20125 }, { "epoch": 2.92, "grad_norm": 9.218480110168457, "learning_rate": 3.7869304455161455e-09, "loss": 0.3809, "step": 20126 }, { "epoch": 2.92, "grad_norm": 9.001360893249512, "learning_rate": 3.773180966667344e-09, "loss": 0.3283, "step": 20127 }, { "epoch": 2.92, "grad_norm": 8.550420761108398, "learning_rate": 3.759456446662868e-09, "loss": 0.3183, "step": 20128 }, { "epoch": 2.92, "grad_norm": 7.814041614532471, "learning_rate": 3.7457568858464405e-09, "loss": 0.4018, "step": 20129 }, { "epoch": 2.92, "grad_norm": 7.285945892333984, "learning_rate": 3.732082284561344e-09, "loss": 0.3268, "step": 20130 }, { "epoch": 2.92, "grad_norm": 7.9337849617004395, "learning_rate": 3.7184326431501934e-09, "loss": 0.3455, "step": 20131 }, { "epoch": 2.92, "grad_norm": 7.478263854980469, "learning_rate": 3.7048079619549365e-09, "loss": 0.2978, "step": 20132 }, { "epoch": 2.92, "grad_norm": 9.735589981079102, "learning_rate": 3.6912082413168568e-09, "loss": 0.3895, "step": 20133 }, { "epoch": 2.92, "grad_norm": 9.237645149230957, "learning_rate": 3.6776334815766807e-09, "loss": 0.3872, "step": 20134 }, { "epoch": 2.92, "grad_norm": 8.452238082885742, "learning_rate": 3.6640836830746923e-09, "loss": 0.3507, "step": 20135 }, { "epoch": 2.92, "grad_norm": 8.908839225769043, "learning_rate": 3.6505588461500648e-09, "loss": 0.4021, "step": 20136 }, { "epoch": 2.92, "grad_norm": 9.171591758728027, "learning_rate": 3.63705897114186e-09, "loss": 0.3859, "step": 20137 }, { "epoch": 2.92, "grad_norm": 8.353738784790039, "learning_rate": 3.6235840583882517e-09, "loss": 0.3478, "step": 20138 }, { "epoch": 2.92, "grad_norm": 9.237553596496582, "learning_rate": 3.610134108226748e-09, "loss": 0.3472, "step": 20139 }, { "epoch": 2.92, "grad_norm": 9.428351402282715, "learning_rate": 3.5967091209944124e-09, "loss": 0.3594, "step": 20140 }, { "epoch": 2.92, "grad_norm": 8.826953887939453, "learning_rate": 3.5833090970275317e-09, "loss": 0.3841, "step": 20141 }, { "epoch": 2.92, "grad_norm": 9.828492164611816, "learning_rate": 3.569934036661837e-09, "loss": 0.368, "step": 20142 }, { "epoch": 2.92, "grad_norm": 7.366027355194092, "learning_rate": 3.556583940232394e-09, "loss": 0.302, "step": 20143 }, { "epoch": 2.92, "grad_norm": 8.068767547607422, "learning_rate": 3.5432588080736013e-09, "loss": 0.4055, "step": 20144 }, { "epoch": 2.92, "grad_norm": 9.271307945251465, "learning_rate": 3.529958640519526e-09, "loss": 0.4197, "step": 20145 }, { "epoch": 2.92, "grad_norm": 9.180188179016113, "learning_rate": 3.516683437903234e-09, "loss": 0.3633, "step": 20146 }, { "epoch": 2.92, "grad_norm": 9.575156211853027, "learning_rate": 3.5034332005572376e-09, "loss": 0.3755, "step": 20147 }, { "epoch": 2.92, "grad_norm": 9.077496528625488, "learning_rate": 3.4902079288136044e-09, "loss": 0.3969, "step": 20148 }, { "epoch": 2.92, "grad_norm": 7.964131832122803, "learning_rate": 3.477007623003736e-09, "loss": 0.3885, "step": 20149 }, { "epoch": 2.92, "grad_norm": 9.034737586975098, "learning_rate": 3.4638322834582567e-09, "loss": 0.3625, "step": 20150 }, { "epoch": 2.92, "grad_norm": 8.865622520446777, "learning_rate": 3.450681910507236e-09, "loss": 0.3344, "step": 20151 }, { "epoch": 2.92, "grad_norm": 7.702829360961914, "learning_rate": 3.4375565044800768e-09, "loss": 0.3431, "step": 20152 }, { "epoch": 2.92, "grad_norm": 8.810455322265625, "learning_rate": 3.4244560657057386e-09, "loss": 0.3624, "step": 20153 }, { "epoch": 2.92, "grad_norm": 8.238052368164062, "learning_rate": 3.4113805945124032e-09, "loss": 0.3658, "step": 20154 }, { "epoch": 2.92, "grad_norm": 7.867082595825195, "learning_rate": 3.398330091227586e-09, "loss": 0.3048, "step": 20155 }, { "epoch": 2.92, "grad_norm": 8.987425804138184, "learning_rate": 3.3853045561784698e-09, "loss": 0.4161, "step": 20156 }, { "epoch": 2.92, "grad_norm": 8.050863265991211, "learning_rate": 3.3723039896910164e-09, "loss": 0.3355, "step": 20157 }, { "epoch": 2.92, "grad_norm": 8.843354225158691, "learning_rate": 3.3593283920912986e-09, "loss": 0.3568, "step": 20158 }, { "epoch": 2.92, "grad_norm": 8.223228454589844, "learning_rate": 3.3463777637041666e-09, "loss": 0.3794, "step": 20159 }, { "epoch": 2.93, "grad_norm": 9.558349609375, "learning_rate": 3.3334521048541396e-09, "loss": 0.3331, "step": 20160 }, { "epoch": 2.93, "grad_norm": 8.552450180053711, "learning_rate": 3.3205514158650695e-09, "loss": 0.3337, "step": 20161 }, { "epoch": 2.93, "grad_norm": 9.59184455871582, "learning_rate": 3.3076756970601416e-09, "loss": 0.3697, "step": 20162 }, { "epoch": 2.93, "grad_norm": 8.339274406433105, "learning_rate": 3.2948249487619872e-09, "loss": 0.3583, "step": 20163 }, { "epoch": 2.93, "grad_norm": 9.22200870513916, "learning_rate": 3.28199917129246e-09, "loss": 0.4196, "step": 20164 }, { "epoch": 2.93, "grad_norm": 8.854286193847656, "learning_rate": 3.2691983649729693e-09, "loss": 0.3657, "step": 20165 }, { "epoch": 2.93, "grad_norm": 8.406228065490723, "learning_rate": 3.2564225301241475e-09, "loss": 0.3579, "step": 20166 }, { "epoch": 2.93, "grad_norm": 8.47177791595459, "learning_rate": 3.2436716670661835e-09, "loss": 0.402, "step": 20167 }, { "epoch": 2.93, "grad_norm": 8.357810974121094, "learning_rate": 3.2309457761184878e-09, "loss": 0.3777, "step": 20168 }, { "epoch": 2.93, "grad_norm": 8.45618724822998, "learning_rate": 3.2182448575998056e-09, "loss": 0.3375, "step": 20169 }, { "epoch": 2.93, "grad_norm": 8.538148880004883, "learning_rate": 3.205568911828438e-09, "loss": 0.3286, "step": 20170 }, { "epoch": 2.93, "grad_norm": 8.76828670501709, "learning_rate": 3.1929179391217973e-09, "loss": 0.3789, "step": 20171 }, { "epoch": 2.93, "grad_norm": 9.545645713806152, "learning_rate": 3.180291939797186e-09, "loss": 0.3668, "step": 20172 }, { "epoch": 2.93, "grad_norm": 8.267563819885254, "learning_rate": 3.1676909141704622e-09, "loss": 0.3332, "step": 20173 }, { "epoch": 2.93, "grad_norm": 8.054540634155273, "learning_rate": 3.155114862557706e-09, "loss": 0.3468, "step": 20174 }, { "epoch": 2.93, "grad_norm": 7.8692755699157715, "learning_rate": 3.1425637852737775e-09, "loss": 0.3237, "step": 20175 }, { "epoch": 2.93, "grad_norm": 8.570859909057617, "learning_rate": 3.130037682633313e-09, "loss": 0.3407, "step": 20176 }, { "epoch": 2.93, "grad_norm": 9.67074966430664, "learning_rate": 3.117536554949951e-09, "loss": 0.3355, "step": 20177 }, { "epoch": 2.93, "grad_norm": 9.8085298538208, "learning_rate": 3.1050604025368853e-09, "loss": 0.3634, "step": 20178 }, { "epoch": 2.93, "grad_norm": 9.019221305847168, "learning_rate": 3.092609225706755e-09, "loss": 0.3837, "step": 20179 }, { "epoch": 2.93, "grad_norm": 9.357522010803223, "learning_rate": 3.080183024771532e-09, "loss": 0.3198, "step": 20180 }, { "epoch": 2.93, "grad_norm": 9.374055862426758, "learning_rate": 3.0677818000425238e-09, "loss": 0.4049, "step": 20181 }, { "epoch": 2.93, "grad_norm": 8.374857902526855, "learning_rate": 3.05540555183037e-09, "loss": 0.3328, "step": 20182 }, { "epoch": 2.93, "grad_norm": 9.31525707244873, "learning_rate": 3.0430542804451565e-09, "loss": 0.3158, "step": 20183 }, { "epoch": 2.93, "grad_norm": 9.664253234863281, "learning_rate": 3.030727986196302e-09, "loss": 0.3424, "step": 20184 }, { "epoch": 2.93, "grad_norm": 8.204623222351074, "learning_rate": 3.018426669392671e-09, "loss": 0.3099, "step": 20185 }, { "epoch": 2.93, "grad_norm": 7.851303577423096, "learning_rate": 3.0061503303424607e-09, "loss": 0.349, "step": 20186 }, { "epoch": 2.93, "grad_norm": 8.848959922790527, "learning_rate": 2.9938989693532034e-09, "loss": 0.3648, "step": 20187 }, { "epoch": 2.93, "grad_norm": 8.395263671875, "learning_rate": 2.981672586731876e-09, "loss": 0.3318, "step": 20188 }, { "epoch": 2.93, "grad_norm": 9.56800651550293, "learning_rate": 2.9694711827846774e-09, "loss": 0.3796, "step": 20189 }, { "epoch": 2.93, "grad_norm": 8.492724418640137, "learning_rate": 2.9572947578173635e-09, "loss": 0.336, "step": 20190 }, { "epoch": 2.93, "grad_norm": 9.240606307983398, "learning_rate": 2.9451433121351344e-09, "loss": 0.3404, "step": 20191 }, { "epoch": 2.93, "grad_norm": 8.638585090637207, "learning_rate": 2.933016846042191e-09, "loss": 0.3584, "step": 20192 }, { "epoch": 2.93, "grad_norm": 9.089886665344238, "learning_rate": 2.920915359842513e-09, "loss": 0.3445, "step": 20193 }, { "epoch": 2.93, "grad_norm": 8.102555274963379, "learning_rate": 2.9088388538391906e-09, "loss": 0.3247, "step": 20194 }, { "epoch": 2.93, "grad_norm": 8.343091011047363, "learning_rate": 2.89678732833476e-09, "loss": 0.3216, "step": 20195 }, { "epoch": 2.93, "grad_norm": 9.323348999023438, "learning_rate": 2.8847607836312017e-09, "loss": 0.3391, "step": 20196 }, { "epoch": 2.93, "grad_norm": 9.47842788696289, "learning_rate": 2.872759220029941e-09, "loss": 0.3775, "step": 20197 }, { "epoch": 2.93, "grad_norm": 8.52338695526123, "learning_rate": 2.860782637831405e-09, "loss": 0.3556, "step": 20198 }, { "epoch": 2.93, "grad_norm": 9.055590629577637, "learning_rate": 2.8488310373357973e-09, "loss": 0.3567, "step": 20199 }, { "epoch": 2.93, "grad_norm": 7.852649688720703, "learning_rate": 2.836904418842545e-09, "loss": 0.3501, "step": 20200 }, { "epoch": 2.93, "grad_norm": 9.766072273254395, "learning_rate": 2.825002782650521e-09, "loss": 0.3914, "step": 20201 }, { "epoch": 2.93, "grad_norm": 8.344741821289062, "learning_rate": 2.813126129057708e-09, "loss": 0.3075, "step": 20202 }, { "epoch": 2.93, "grad_norm": 10.404101371765137, "learning_rate": 2.8012744583617574e-09, "loss": 0.3604, "step": 20203 }, { "epoch": 2.93, "grad_norm": 8.027653694152832, "learning_rate": 2.7894477708596544e-09, "loss": 0.3247, "step": 20204 }, { "epoch": 2.93, "grad_norm": 8.755202293395996, "learning_rate": 2.7776460668476055e-09, "loss": 0.3132, "step": 20205 }, { "epoch": 2.93, "grad_norm": 8.488409996032715, "learning_rate": 2.7658693466213745e-09, "loss": 0.3325, "step": 20206 }, { "epoch": 2.93, "grad_norm": 8.202797889709473, "learning_rate": 2.7541176104758368e-09, "loss": 0.3884, "step": 20207 }, { "epoch": 2.93, "grad_norm": 8.676390647888184, "learning_rate": 2.7423908587056453e-09, "loss": 0.3339, "step": 20208 }, { "epoch": 2.93, "grad_norm": 8.808860778808594, "learning_rate": 2.7306890916044546e-09, "loss": 0.3681, "step": 20209 }, { "epoch": 2.93, "grad_norm": 7.528934478759766, "learning_rate": 2.719012309465363e-09, "loss": 0.3077, "step": 20210 }, { "epoch": 2.93, "grad_norm": 9.572096824645996, "learning_rate": 2.707360512581136e-09, "loss": 0.4041, "step": 20211 }, { "epoch": 2.93, "grad_norm": 9.023865699768066, "learning_rate": 2.6957337012435412e-09, "loss": 0.3851, "step": 20212 }, { "epoch": 2.93, "grad_norm": 9.065759658813477, "learning_rate": 2.6841318757437893e-09, "loss": 0.4076, "step": 20213 }, { "epoch": 2.93, "grad_norm": 8.891166687011719, "learning_rate": 2.6725550363727588e-09, "loss": 0.3649, "step": 20214 }, { "epoch": 2.93, "grad_norm": 9.240118980407715, "learning_rate": 2.6610031834202184e-09, "loss": 0.3642, "step": 20215 }, { "epoch": 2.93, "grad_norm": 8.36821460723877, "learning_rate": 2.649476317175936e-09, "loss": 0.3524, "step": 20216 }, { "epoch": 2.93, "grad_norm": 7.974249362945557, "learning_rate": 2.6379744379283473e-09, "loss": 0.2979, "step": 20217 }, { "epoch": 2.93, "grad_norm": 9.399697303771973, "learning_rate": 2.626497545965889e-09, "loss": 0.3394, "step": 20218 }, { "epoch": 2.93, "grad_norm": 7.82094144821167, "learning_rate": 2.6150456415758858e-09, "loss": 0.3158, "step": 20219 }, { "epoch": 2.93, "grad_norm": 8.598540306091309, "learning_rate": 2.6036187250453313e-09, "loss": 0.3371, "step": 20220 }, { "epoch": 2.93, "grad_norm": 8.391217231750488, "learning_rate": 2.5922167966605513e-09, "loss": 0.3562, "step": 20221 }, { "epoch": 2.93, "grad_norm": 8.471540451049805, "learning_rate": 2.5808398567072063e-09, "loss": 0.302, "step": 20222 }, { "epoch": 2.93, "grad_norm": 8.77519416809082, "learning_rate": 2.5694879054701803e-09, "loss": 0.3447, "step": 20223 }, { "epoch": 2.93, "grad_norm": 8.50700855255127, "learning_rate": 2.5581609432340225e-09, "loss": 0.3196, "step": 20224 }, { "epoch": 2.93, "grad_norm": 8.22362995147705, "learning_rate": 2.546858970282506e-09, "loss": 0.3569, "step": 20225 }, { "epoch": 2.93, "grad_norm": 8.512346267700195, "learning_rate": 2.535581986898738e-09, "loss": 0.3219, "step": 20226 }, { "epoch": 2.93, "grad_norm": 9.356271743774414, "learning_rate": 2.5243299933651596e-09, "loss": 0.3207, "step": 20227 }, { "epoch": 2.93, "grad_norm": 10.67812442779541, "learning_rate": 2.5131029899638777e-09, "loss": 0.4356, "step": 20228 }, { "epoch": 2.94, "grad_norm": 9.843085289001465, "learning_rate": 2.5019009769760014e-09, "loss": 0.3693, "step": 20229 }, { "epoch": 2.94, "grad_norm": 8.964570999145508, "learning_rate": 2.4907239546823054e-09, "loss": 0.3604, "step": 20230 }, { "epoch": 2.94, "grad_norm": 9.311117172241211, "learning_rate": 2.4795719233625668e-09, "loss": 0.3497, "step": 20231 }, { "epoch": 2.94, "grad_norm": 7.536618709564209, "learning_rate": 2.468444883296561e-09, "loss": 0.3437, "step": 20232 }, { "epoch": 2.94, "grad_norm": 9.892539978027344, "learning_rate": 2.4573428347627324e-09, "loss": 0.3818, "step": 20233 }, { "epoch": 2.94, "grad_norm": 9.46091365814209, "learning_rate": 2.4462657780393027e-09, "loss": 0.3696, "step": 20234 }, { "epoch": 2.94, "grad_norm": 9.34397029876709, "learning_rate": 2.4352137134039385e-09, "loss": 0.3887, "step": 20235 }, { "epoch": 2.94, "grad_norm": 8.524577140808105, "learning_rate": 2.4241866411333078e-09, "loss": 0.3628, "step": 20236 }, { "epoch": 2.94, "grad_norm": 7.606689929962158, "learning_rate": 2.413184561503856e-09, "loss": 0.2889, "step": 20237 }, { "epoch": 2.94, "grad_norm": 8.512388229370117, "learning_rate": 2.402207474791251e-09, "loss": 0.3243, "step": 20238 }, { "epoch": 2.94, "grad_norm": 8.618144989013672, "learning_rate": 2.391255381270274e-09, "loss": 0.3586, "step": 20239 }, { "epoch": 2.94, "grad_norm": 8.665462493896484, "learning_rate": 2.3803282812154823e-09, "loss": 0.3567, "step": 20240 }, { "epoch": 2.94, "grad_norm": 8.239338874816895, "learning_rate": 2.369426174900657e-09, "loss": 0.3369, "step": 20241 }, { "epoch": 2.94, "grad_norm": 8.088150024414062, "learning_rate": 2.3585490625988024e-09, "loss": 0.3407, "step": 20242 }, { "epoch": 2.94, "grad_norm": 8.116708755493164, "learning_rate": 2.347696944582589e-09, "loss": 0.3823, "step": 20243 }, { "epoch": 2.94, "grad_norm": 8.552990913391113, "learning_rate": 2.336869821123688e-09, "loss": 0.3265, "step": 20244 }, { "epoch": 2.94, "grad_norm": 7.84351110458374, "learning_rate": 2.3260676924935495e-09, "loss": 0.36, "step": 20245 }, { "epoch": 2.94, "grad_norm": 8.466452598571777, "learning_rate": 2.3152905589627347e-09, "loss": 0.3555, "step": 20246 }, { "epoch": 2.94, "grad_norm": 7.982863903045654, "learning_rate": 2.3045384208013606e-09, "loss": 0.3205, "step": 20247 }, { "epoch": 2.94, "grad_norm": 8.579256057739258, "learning_rate": 2.2938112782785456e-09, "loss": 0.3267, "step": 20248 }, { "epoch": 2.94, "grad_norm": 8.52906608581543, "learning_rate": 2.2831091316631856e-09, "loss": 0.326, "step": 20249 }, { "epoch": 2.94, "grad_norm": 9.888943672180176, "learning_rate": 2.2724319812235105e-09, "loss": 0.386, "step": 20250 }, { "epoch": 2.94, "grad_norm": 8.884238243103027, "learning_rate": 2.2617798272268616e-09, "loss": 0.38, "step": 20251 }, { "epoch": 2.94, "grad_norm": 9.064797401428223, "learning_rate": 2.2511526699401375e-09, "loss": 0.3017, "step": 20252 }, { "epoch": 2.94, "grad_norm": 8.708942413330078, "learning_rate": 2.2405505096296796e-09, "loss": 0.2973, "step": 20253 }, { "epoch": 2.94, "grad_norm": 9.245223045349121, "learning_rate": 2.2299733465609426e-09, "loss": 0.3639, "step": 20254 }, { "epoch": 2.94, "grad_norm": 10.67519474029541, "learning_rate": 2.219421180999048e-09, "loss": 0.3741, "step": 20255 }, { "epoch": 2.94, "grad_norm": 8.294839859008789, "learning_rate": 2.20889401320834e-09, "loss": 0.3441, "step": 20256 }, { "epoch": 2.94, "grad_norm": 8.774887084960938, "learning_rate": 2.198391843452607e-09, "loss": 0.3724, "step": 20257 }, { "epoch": 2.94, "grad_norm": 8.066263198852539, "learning_rate": 2.1879146719948615e-09, "loss": 0.2851, "step": 20258 }, { "epoch": 2.94, "grad_norm": 9.032087326049805, "learning_rate": 2.177462499097671e-09, "loss": 0.3699, "step": 20259 }, { "epoch": 2.94, "grad_norm": 8.909333229064941, "learning_rate": 2.167035325022826e-09, "loss": 0.353, "step": 20260 }, { "epoch": 2.94, "grad_norm": 8.172605514526367, "learning_rate": 2.1566331500315615e-09, "loss": 0.3604, "step": 20261 }, { "epoch": 2.94, "grad_norm": 7.54274845123291, "learning_rate": 2.14625597438467e-09, "loss": 0.3274, "step": 20262 }, { "epoch": 2.94, "grad_norm": 8.935785293579102, "learning_rate": 2.1359037983418317e-09, "loss": 0.2706, "step": 20263 }, { "epoch": 2.94, "grad_norm": 8.446493148803711, "learning_rate": 2.125576622162728e-09, "loss": 0.3196, "step": 20264 }, { "epoch": 2.94, "grad_norm": 8.26922607421875, "learning_rate": 2.115274446105819e-09, "loss": 0.3194, "step": 20265 }, { "epoch": 2.94, "grad_norm": 9.019730567932129, "learning_rate": 2.104997270429343e-09, "loss": 0.4313, "step": 20266 }, { "epoch": 2.94, "grad_norm": 8.80567455291748, "learning_rate": 2.09474509539076e-09, "loss": 0.3541, "step": 20267 }, { "epoch": 2.94, "grad_norm": 8.22042179107666, "learning_rate": 2.0845179212468643e-09, "loss": 0.3182, "step": 20268 }, { "epoch": 2.94, "grad_norm": 8.022191047668457, "learning_rate": 2.0743157482538964e-09, "loss": 0.3491, "step": 20269 }, { "epoch": 2.94, "grad_norm": 8.598760604858398, "learning_rate": 2.064138576667651e-09, "loss": 0.2991, "step": 20270 }, { "epoch": 2.94, "grad_norm": 8.2935152053833, "learning_rate": 2.0539864067428136e-09, "loss": 0.3283, "step": 20271 }, { "epoch": 2.94, "grad_norm": 9.328401565551758, "learning_rate": 2.043859238733847e-09, "loss": 0.4009, "step": 20272 }, { "epoch": 2.94, "grad_norm": 7.921667575836182, "learning_rate": 2.0337570728945488e-09, "loss": 0.3528, "step": 20273 }, { "epoch": 2.94, "grad_norm": 8.948497772216797, "learning_rate": 2.0236799094779378e-09, "loss": 0.3326, "step": 20274 }, { "epoch": 2.94, "grad_norm": 8.293716430664062, "learning_rate": 2.0136277487364795e-09, "loss": 0.2998, "step": 20275 }, { "epoch": 2.94, "grad_norm": 8.665327072143555, "learning_rate": 2.003600590921972e-09, "loss": 0.4087, "step": 20276 }, { "epoch": 2.94, "grad_norm": 8.09980297088623, "learning_rate": 1.9935984362856595e-09, "loss": 0.3453, "step": 20277 }, { "epoch": 2.94, "grad_norm": 8.860794067382812, "learning_rate": 1.98362128507823e-09, "loss": 0.3037, "step": 20278 }, { "epoch": 2.94, "grad_norm": 9.51419448852539, "learning_rate": 1.973669137549594e-09, "loss": 0.3052, "step": 20279 }, { "epoch": 2.94, "grad_norm": 8.394272804260254, "learning_rate": 1.9637419939488863e-09, "loss": 0.3471, "step": 20280 }, { "epoch": 2.94, "grad_norm": 8.215147972106934, "learning_rate": 1.95383985452513e-09, "loss": 0.3637, "step": 20281 }, { "epoch": 2.94, "grad_norm": 8.7514009475708, "learning_rate": 1.9439627195262378e-09, "loss": 0.3141, "step": 20282 }, { "epoch": 2.94, "grad_norm": 8.270981788635254, "learning_rate": 1.934110589199678e-09, "loss": 0.2812, "step": 20283 }, { "epoch": 2.94, "grad_norm": 9.16844654083252, "learning_rate": 1.9242834637922534e-09, "loss": 0.3675, "step": 20284 }, { "epoch": 2.94, "grad_norm": 8.408989906311035, "learning_rate": 1.9144813435502116e-09, "loss": 0.305, "step": 20285 }, { "epoch": 2.94, "grad_norm": 8.138656616210938, "learning_rate": 1.904704228719134e-09, "loss": 0.3548, "step": 20286 }, { "epoch": 2.94, "grad_norm": 9.132957458496094, "learning_rate": 1.8949521195440464e-09, "loss": 0.3604, "step": 20287 }, { "epoch": 2.94, "grad_norm": 8.860642433166504, "learning_rate": 1.8852250162690873e-09, "loss": 0.3243, "step": 20288 }, { "epoch": 2.94, "grad_norm": 8.253986358642578, "learning_rate": 1.8755229191380616e-09, "loss": 0.3331, "step": 20289 }, { "epoch": 2.94, "grad_norm": 9.017207145690918, "learning_rate": 1.865845828393997e-09, "loss": 0.3808, "step": 20290 }, { "epoch": 2.94, "grad_norm": 9.597518920898438, "learning_rate": 1.8561937442794773e-09, "loss": 0.4205, "step": 20291 }, { "epoch": 2.94, "grad_norm": 9.631792068481445, "learning_rate": 1.846566667036087e-09, "loss": 0.3694, "step": 20292 }, { "epoch": 2.94, "grad_norm": 7.795841693878174, "learning_rate": 1.8369645969051884e-09, "loss": 0.3823, "step": 20293 }, { "epoch": 2.94, "grad_norm": 8.642792701721191, "learning_rate": 1.827387534127367e-09, "loss": 0.3807, "step": 20294 }, { "epoch": 2.94, "grad_norm": 9.49400520324707, "learning_rate": 1.8178354789424311e-09, "loss": 0.3726, "step": 20295 }, { "epoch": 2.94, "grad_norm": 8.302138328552246, "learning_rate": 1.8083084315897445e-09, "loss": 0.3884, "step": 20296 }, { "epoch": 2.95, "grad_norm": 8.868568420410156, "learning_rate": 1.7988063923080055e-09, "loss": 0.3343, "step": 20297 }, { "epoch": 2.95, "grad_norm": 9.134947776794434, "learning_rate": 1.7893293613352456e-09, "loss": 0.328, "step": 20298 }, { "epoch": 2.95, "grad_norm": 8.088889122009277, "learning_rate": 1.7798773389089417e-09, "loss": 0.3092, "step": 20299 }, { "epoch": 2.95, "grad_norm": 7.961977005004883, "learning_rate": 1.7704503252657931e-09, "loss": 0.3353, "step": 20300 }, { "epoch": 2.95, "grad_norm": 8.030213356018066, "learning_rate": 1.7610483206421668e-09, "loss": 0.3366, "step": 20301 }, { "epoch": 2.95, "grad_norm": 8.413995742797852, "learning_rate": 1.7516713252734293e-09, "loss": 0.3548, "step": 20302 }, { "epoch": 2.95, "grad_norm": 8.669967651367188, "learning_rate": 1.7423193393946157e-09, "loss": 0.3573, "step": 20303 }, { "epoch": 2.95, "grad_norm": 9.454817771911621, "learning_rate": 1.7329923632399823e-09, "loss": 0.3742, "step": 20304 }, { "epoch": 2.95, "grad_norm": 8.307744026184082, "learning_rate": 1.7236903970433426e-09, "loss": 0.3471, "step": 20305 }, { "epoch": 2.95, "grad_norm": 8.074591636657715, "learning_rate": 1.714413441037399e-09, "loss": 0.2899, "step": 20306 }, { "epoch": 2.95, "grad_norm": 8.045106887817383, "learning_rate": 1.7051614954549653e-09, "loss": 0.3702, "step": 20307 }, { "epoch": 2.95, "grad_norm": 8.07528305053711, "learning_rate": 1.6959345605275233e-09, "loss": 0.3397, "step": 20308 }, { "epoch": 2.95, "grad_norm": 9.332582473754883, "learning_rate": 1.6867326364864432e-09, "loss": 0.333, "step": 20309 }, { "epoch": 2.95, "grad_norm": 8.208779335021973, "learning_rate": 1.6775557235620963e-09, "loss": 0.3336, "step": 20310 }, { "epoch": 2.95, "grad_norm": 8.144163131713867, "learning_rate": 1.6684038219845209e-09, "loss": 0.3198, "step": 20311 }, { "epoch": 2.95, "grad_norm": 7.6117448806762695, "learning_rate": 1.659276931983089e-09, "loss": 0.3161, "step": 20312 }, { "epoch": 2.95, "grad_norm": 7.758651256561279, "learning_rate": 1.6501750537861736e-09, "loss": 0.3633, "step": 20313 }, { "epoch": 2.95, "grad_norm": 8.805753707885742, "learning_rate": 1.6410981876221474e-09, "loss": 0.3361, "step": 20314 }, { "epoch": 2.95, "grad_norm": 8.62786865234375, "learning_rate": 1.6320463337181623e-09, "loss": 0.3437, "step": 20315 }, { "epoch": 2.95, "grad_norm": 8.483732223510742, "learning_rate": 1.6230194923010366e-09, "loss": 0.3372, "step": 20316 }, { "epoch": 2.95, "grad_norm": 9.183968544006348, "learning_rate": 1.6140176635969226e-09, "loss": 0.3772, "step": 20317 }, { "epoch": 2.95, "grad_norm": 8.043237686157227, "learning_rate": 1.6050408478315292e-09, "loss": 0.3353, "step": 20318 }, { "epoch": 2.95, "grad_norm": 9.310556411743164, "learning_rate": 1.596089045229565e-09, "loss": 0.3279, "step": 20319 }, { "epoch": 2.95, "grad_norm": 8.932106018066406, "learning_rate": 1.5871622560152953e-09, "loss": 0.4005, "step": 20320 }, { "epoch": 2.95, "grad_norm": 8.657227516174316, "learning_rate": 1.57826048041243e-09, "loss": 0.3132, "step": 20321 }, { "epoch": 2.95, "grad_norm": 9.701165199279785, "learning_rate": 1.5693837186440128e-09, "loss": 0.3707, "step": 20322 }, { "epoch": 2.95, "grad_norm": 9.88326644897461, "learning_rate": 1.5605319709324216e-09, "loss": 0.3031, "step": 20323 }, { "epoch": 2.95, "grad_norm": 8.59538745880127, "learning_rate": 1.5517052374994788e-09, "loss": 0.3532, "step": 20324 }, { "epoch": 2.95, "grad_norm": 7.974210262298584, "learning_rate": 1.5429035185661187e-09, "loss": 0.3493, "step": 20325 }, { "epoch": 2.95, "grad_norm": 8.385544776916504, "learning_rate": 1.534126814353054e-09, "loss": 0.3134, "step": 20326 }, { "epoch": 2.95, "grad_norm": 8.345699310302734, "learning_rate": 1.5253751250801084e-09, "loss": 0.3382, "step": 20327 }, { "epoch": 2.95, "grad_norm": 8.163054466247559, "learning_rate": 1.516648450966551e-09, "loss": 0.3129, "step": 20328 }, { "epoch": 2.95, "grad_norm": 7.73489236831665, "learning_rate": 1.5079467922309852e-09, "loss": 0.3258, "step": 20329 }, { "epoch": 2.95, "grad_norm": 8.208575248718262, "learning_rate": 1.4992701490913472e-09, "loss": 0.3543, "step": 20330 }, { "epoch": 2.95, "grad_norm": 9.298090934753418, "learning_rate": 1.4906185217652413e-09, "loss": 0.391, "step": 20331 }, { "epoch": 2.95, "grad_norm": 8.518342018127441, "learning_rate": 1.4819919104691603e-09, "loss": 0.3719, "step": 20332 }, { "epoch": 2.95, "grad_norm": 8.450894355773926, "learning_rate": 1.4733903154194871e-09, "loss": 0.3663, "step": 20333 }, { "epoch": 2.95, "grad_norm": 9.831704139709473, "learning_rate": 1.464813736831494e-09, "loss": 0.4008, "step": 20334 }, { "epoch": 2.95, "grad_norm": 8.440312385559082, "learning_rate": 1.4562621749201198e-09, "loss": 0.3605, "step": 20335 }, { "epoch": 2.95, "grad_norm": 7.3289875984191895, "learning_rate": 1.447735629899638e-09, "loss": 0.3086, "step": 20336 }, { "epoch": 2.95, "grad_norm": 8.558704376220703, "learning_rate": 1.4392341019837661e-09, "loss": 0.3316, "step": 20337 }, { "epoch": 2.95, "grad_norm": 8.996728897094727, "learning_rate": 1.4307575913853343e-09, "loss": 0.3613, "step": 20338 }, { "epoch": 2.95, "grad_norm": 7.720022201538086, "learning_rate": 1.4223060983167278e-09, "loss": 0.3357, "step": 20339 }, { "epoch": 2.95, "grad_norm": 8.789804458618164, "learning_rate": 1.4138796229897776e-09, "loss": 0.3143, "step": 20340 }, { "epoch": 2.95, "grad_norm": 10.0909423828125, "learning_rate": 1.4054781656154257e-09, "loss": 0.3658, "step": 20341 }, { "epoch": 2.95, "grad_norm": 10.09698486328125, "learning_rate": 1.3971017264043928e-09, "loss": 0.3752, "step": 20342 }, { "epoch": 2.95, "grad_norm": 7.9791789054870605, "learning_rate": 1.3887503055663995e-09, "loss": 0.3395, "step": 20343 }, { "epoch": 2.95, "grad_norm": 8.915908813476562, "learning_rate": 1.3804239033107235e-09, "loss": 0.368, "step": 20344 }, { "epoch": 2.95, "grad_norm": 9.210179328918457, "learning_rate": 1.372122519845864e-09, "loss": 0.3626, "step": 20345 }, { "epoch": 2.95, "grad_norm": 8.449223518371582, "learning_rate": 1.3638461553798775e-09, "loss": 0.3669, "step": 20346 }, { "epoch": 2.95, "grad_norm": 9.037012100219727, "learning_rate": 1.3555948101201531e-09, "loss": 0.3477, "step": 20347 }, { "epoch": 2.95, "grad_norm": 7.774250030517578, "learning_rate": 1.3473684842734145e-09, "loss": 0.3522, "step": 20348 }, { "epoch": 2.95, "grad_norm": 8.008352279663086, "learning_rate": 1.3391671780457193e-09, "loss": 0.3323, "step": 20349 }, { "epoch": 2.95, "grad_norm": 7.9715118408203125, "learning_rate": 1.3309908916424583e-09, "loss": 0.3246, "step": 20350 }, { "epoch": 2.95, "grad_norm": 8.391308784484863, "learning_rate": 1.3228396252685792e-09, "loss": 0.3725, "step": 20351 }, { "epoch": 2.95, "grad_norm": 9.522761344909668, "learning_rate": 1.3147133791282516e-09, "loss": 0.3458, "step": 20352 }, { "epoch": 2.95, "grad_norm": 8.58193302154541, "learning_rate": 1.3066121534250906e-09, "loss": 0.2949, "step": 20353 }, { "epoch": 2.95, "grad_norm": 9.368690490722656, "learning_rate": 1.298535948362156e-09, "loss": 0.3418, "step": 20354 }, { "epoch": 2.95, "grad_norm": 7.627424716949463, "learning_rate": 1.2904847641416195e-09, "loss": 0.3175, "step": 20355 }, { "epoch": 2.95, "grad_norm": 8.728504180908203, "learning_rate": 1.2824586009652082e-09, "loss": 0.341, "step": 20356 }, { "epoch": 2.95, "grad_norm": 8.621435165405273, "learning_rate": 1.274457459034095e-09, "loss": 0.3563, "step": 20357 }, { "epoch": 2.95, "grad_norm": 8.284360885620117, "learning_rate": 1.266481338548675e-09, "loss": 0.3703, "step": 20358 }, { "epoch": 2.95, "grad_norm": 8.890101432800293, "learning_rate": 1.2585302397087882e-09, "loss": 0.3687, "step": 20359 }, { "epoch": 2.95, "grad_norm": 8.826192855834961, "learning_rate": 1.2506041627136087e-09, "loss": 0.3817, "step": 20360 }, { "epoch": 2.95, "grad_norm": 8.18100643157959, "learning_rate": 1.2427031077618667e-09, "loss": 0.4191, "step": 20361 }, { "epoch": 2.95, "grad_norm": 8.79273509979248, "learning_rate": 1.2348270750512923e-09, "loss": 0.3522, "step": 20362 }, { "epoch": 2.95, "grad_norm": 8.349198341369629, "learning_rate": 1.2269760647792838e-09, "loss": 0.3368, "step": 20363 }, { "epoch": 2.95, "grad_norm": 6.759281635284424, "learning_rate": 1.2191500771426833e-09, "loss": 0.32, "step": 20364 }, { "epoch": 2.95, "grad_norm": 8.644436836242676, "learning_rate": 1.2113491123372234e-09, "loss": 0.3494, "step": 20365 }, { "epoch": 2.96, "grad_norm": 9.557619094848633, "learning_rate": 1.2035731705587471e-09, "loss": 0.3379, "step": 20366 }, { "epoch": 2.96, "grad_norm": 8.990396499633789, "learning_rate": 1.1958222520017657e-09, "loss": 0.3857, "step": 20367 }, { "epoch": 2.96, "grad_norm": 7.543969631195068, "learning_rate": 1.1880963568605683e-09, "loss": 0.2949, "step": 20368 }, { "epoch": 2.96, "grad_norm": 10.04751205444336, "learning_rate": 1.1803954853287779e-09, "loss": 0.4084, "step": 20369 }, { "epoch": 2.96, "grad_norm": 9.41248607635498, "learning_rate": 1.17271963759924e-09, "loss": 0.3813, "step": 20370 }, { "epoch": 2.96, "grad_norm": 8.756355285644531, "learning_rate": 1.165068813864356e-09, "loss": 0.4146, "step": 20371 }, { "epoch": 2.96, "grad_norm": 8.455885887145996, "learning_rate": 1.15744301431564e-09, "loss": 0.3221, "step": 20372 }, { "epoch": 2.96, "grad_norm": 7.951450347900391, "learning_rate": 1.149842239144272e-09, "loss": 0.3732, "step": 20373 }, { "epoch": 2.96, "grad_norm": 9.138707160949707, "learning_rate": 1.1422664885406552e-09, "loss": 0.3516, "step": 20374 }, { "epoch": 2.96, "grad_norm": 9.01846694946289, "learning_rate": 1.134715762694527e-09, "loss": 0.3869, "step": 20375 }, { "epoch": 2.96, "grad_norm": 8.424893379211426, "learning_rate": 1.1271900617951802e-09, "loss": 0.3063, "step": 20376 }, { "epoch": 2.96, "grad_norm": 8.84594440460205, "learning_rate": 1.1196893860310198e-09, "loss": 0.3183, "step": 20377 }, { "epoch": 2.96, "grad_norm": 8.668008804321289, "learning_rate": 1.1122137355900062e-09, "loss": 0.3501, "step": 20378 }, { "epoch": 2.96, "grad_norm": 9.050615310668945, "learning_rate": 1.1047631106595457e-09, "loss": 0.3705, "step": 20379 }, { "epoch": 2.96, "grad_norm": 7.763800621032715, "learning_rate": 1.097337511426266e-09, "loss": 0.3276, "step": 20380 }, { "epoch": 2.96, "grad_norm": 9.421998977661133, "learning_rate": 1.0899369380760193e-09, "loss": 0.3835, "step": 20381 }, { "epoch": 2.96, "grad_norm": 10.398332595825195, "learning_rate": 1.0825613907944342e-09, "loss": 0.4179, "step": 20382 }, { "epoch": 2.96, "grad_norm": 8.357649803161621, "learning_rate": 1.0752108697661412e-09, "loss": 0.3245, "step": 20383 }, { "epoch": 2.96, "grad_norm": 10.033045768737793, "learning_rate": 1.0678853751753258e-09, "loss": 0.3662, "step": 20384 }, { "epoch": 2.96, "grad_norm": 8.752909660339355, "learning_rate": 1.0605849072056195e-09, "loss": 0.3924, "step": 20385 }, { "epoch": 2.96, "grad_norm": 8.837754249572754, "learning_rate": 1.0533094660398756e-09, "loss": 0.3014, "step": 20386 }, { "epoch": 2.96, "grad_norm": 7.5420708656311035, "learning_rate": 1.046059051860282e-09, "loss": 0.279, "step": 20387 }, { "epoch": 2.96, "grad_norm": 9.880562782287598, "learning_rate": 1.0388336648485818e-09, "loss": 0.4155, "step": 20388 }, { "epoch": 2.96, "grad_norm": 8.333602905273438, "learning_rate": 1.0316333051857418e-09, "loss": 0.3225, "step": 20389 }, { "epoch": 2.96, "grad_norm": 8.787501335144043, "learning_rate": 1.024457973052173e-09, "loss": 0.3548, "step": 20390 }, { "epoch": 2.96, "grad_norm": 7.547834396362305, "learning_rate": 1.0173076686276205e-09, "loss": 0.3608, "step": 20391 }, { "epoch": 2.96, "grad_norm": 8.483510971069336, "learning_rate": 1.0101823920912745e-09, "loss": 0.3052, "step": 20392 }, { "epoch": 2.96, "grad_norm": 8.602851867675781, "learning_rate": 1.0030821436216586e-09, "loss": 0.3564, "step": 20393 }, { "epoch": 2.96, "grad_norm": 8.941173553466797, "learning_rate": 9.960069233966305e-10, "loss": 0.3508, "step": 20394 }, { "epoch": 2.96, "grad_norm": 8.481785774230957, "learning_rate": 9.889567315933823e-10, "loss": 0.3519, "step": 20395 }, { "epoch": 2.96, "grad_norm": 9.208219528198242, "learning_rate": 9.8193156838855e-10, "loss": 0.3314, "step": 20396 }, { "epoch": 2.96, "grad_norm": 8.653590202331543, "learning_rate": 9.749314339583258e-10, "loss": 0.3647, "step": 20397 }, { "epoch": 2.96, "grad_norm": 9.893413543701172, "learning_rate": 9.679563284779035e-10, "loss": 0.3872, "step": 20398 }, { "epoch": 2.96, "grad_norm": 8.617663383483887, "learning_rate": 9.61006252122032e-10, "loss": 0.3223, "step": 20399 }, { "epoch": 2.96, "grad_norm": 9.143473625183105, "learning_rate": 9.54081205064905e-10, "loss": 0.2945, "step": 20400 }, { "epoch": 2.96, "grad_norm": 9.535933494567871, "learning_rate": 9.471811874800507e-10, "loss": 0.3459, "step": 20401 }, { "epoch": 2.96, "grad_norm": 7.768964767456055, "learning_rate": 9.403061995401085e-10, "loss": 0.3157, "step": 20402 }, { "epoch": 2.96, "grad_norm": 8.071183204650879, "learning_rate": 9.334562414177183e-10, "loss": 0.3966, "step": 20403 }, { "epoch": 2.96, "grad_norm": 8.295730590820312, "learning_rate": 9.266313132840764e-10, "loss": 0.3555, "step": 20404 }, { "epoch": 2.96, "grad_norm": 8.773404121398926, "learning_rate": 9.198314153104902e-10, "loss": 0.3467, "step": 20405 }, { "epoch": 2.96, "grad_norm": 9.144213676452637, "learning_rate": 9.13056547667157e-10, "loss": 0.3374, "step": 20406 }, { "epoch": 2.96, "grad_norm": 9.013641357421875, "learning_rate": 9.063067105238298e-10, "loss": 0.3958, "step": 20407 }, { "epoch": 2.96, "grad_norm": 8.720635414123535, "learning_rate": 8.995819040495955e-10, "loss": 0.3447, "step": 20408 }, { "epoch": 2.96, "grad_norm": 7.803156852722168, "learning_rate": 8.928821284130972e-10, "loss": 0.3048, "step": 20409 }, { "epoch": 2.96, "grad_norm": 8.114737510681152, "learning_rate": 8.862073837819783e-10, "loss": 0.2788, "step": 20410 }, { "epoch": 2.96, "grad_norm": 7.992167949676514, "learning_rate": 8.795576703235497e-10, "loss": 0.3219, "step": 20411 }, { "epoch": 2.96, "grad_norm": 9.157294273376465, "learning_rate": 8.729329882043446e-10, "loss": 0.3492, "step": 20412 }, { "epoch": 2.96, "grad_norm": 8.566420555114746, "learning_rate": 8.663333375904525e-10, "loss": 0.2805, "step": 20413 }, { "epoch": 2.96, "grad_norm": 8.014892578125, "learning_rate": 8.597587186471855e-10, "loss": 0.3158, "step": 20414 }, { "epoch": 2.96, "grad_norm": 9.090587615966797, "learning_rate": 8.532091315391898e-10, "loss": 0.3638, "step": 20415 }, { "epoch": 2.96, "grad_norm": 8.054884910583496, "learning_rate": 8.466845764305564e-10, "loss": 0.3197, "step": 20416 }, { "epoch": 2.96, "grad_norm": 8.431817054748535, "learning_rate": 8.40185053484821e-10, "loss": 0.3292, "step": 20417 }, { "epoch": 2.96, "grad_norm": 8.202803611755371, "learning_rate": 8.337105628648533e-10, "loss": 0.3063, "step": 20418 }, { "epoch": 2.96, "grad_norm": 9.748504638671875, "learning_rate": 8.272611047327461e-10, "loss": 0.3989, "step": 20419 }, { "epoch": 2.96, "grad_norm": 8.74238395690918, "learning_rate": 8.208366792501476e-10, "loss": 0.3321, "step": 20420 }, { "epoch": 2.96, "grad_norm": 8.973185539245605, "learning_rate": 8.144372865779293e-10, "loss": 0.3375, "step": 20421 }, { "epoch": 2.96, "grad_norm": 8.35180377960205, "learning_rate": 8.080629268765183e-10, "loss": 0.3685, "step": 20422 }, { "epoch": 2.96, "grad_norm": 9.3182954788208, "learning_rate": 8.017136003055647e-10, "loss": 0.3298, "step": 20423 }, { "epoch": 2.96, "grad_norm": 9.149795532226562, "learning_rate": 7.953893070241635e-10, "loss": 0.3558, "step": 20424 }, { "epoch": 2.96, "grad_norm": 10.956080436706543, "learning_rate": 7.890900471907436e-10, "loss": 0.348, "step": 20425 }, { "epoch": 2.96, "grad_norm": 8.412978172302246, "learning_rate": 7.828158209630676e-10, "loss": 0.3108, "step": 20426 }, { "epoch": 2.96, "grad_norm": 8.627310752868652, "learning_rate": 7.76566628498454e-10, "loss": 0.3434, "step": 20427 }, { "epoch": 2.96, "grad_norm": 9.136716842651367, "learning_rate": 7.703424699534444e-10, "loss": 0.3449, "step": 20428 }, { "epoch": 2.96, "grad_norm": 7.655969619750977, "learning_rate": 7.641433454838031e-10, "loss": 0.3183, "step": 20429 }, { "epoch": 2.96, "grad_norm": 8.640201568603516, "learning_rate": 7.579692552450722e-10, "loss": 0.3902, "step": 20430 }, { "epoch": 2.96, "grad_norm": 9.475935935974121, "learning_rate": 7.51820199391684e-10, "loss": 0.3739, "step": 20431 }, { "epoch": 2.96, "grad_norm": 8.499361038208008, "learning_rate": 7.456961780779592e-10, "loss": 0.3305, "step": 20432 }, { "epoch": 2.96, "grad_norm": 8.94946575164795, "learning_rate": 7.395971914571086e-10, "loss": 0.3405, "step": 20433 }, { "epoch": 2.96, "grad_norm": 8.377368927001953, "learning_rate": 7.335232396821211e-10, "loss": 0.3432, "step": 20434 }, { "epoch": 2.97, "grad_norm": 10.005743980407715, "learning_rate": 7.274743229049862e-10, "loss": 0.451, "step": 20435 }, { "epoch": 2.97, "grad_norm": 8.195070266723633, "learning_rate": 7.214504412774714e-10, "loss": 0.3136, "step": 20436 }, { "epoch": 2.97, "grad_norm": 8.186529159545898, "learning_rate": 7.154515949502337e-10, "loss": 0.3255, "step": 20437 }, { "epoch": 2.97, "grad_norm": 10.504688262939453, "learning_rate": 7.094777840737087e-10, "loss": 0.3884, "step": 20438 }, { "epoch": 2.97, "grad_norm": 9.59786605834961, "learning_rate": 7.035290087975543e-10, "loss": 0.3813, "step": 20439 }, { "epoch": 2.97, "grad_norm": 8.885513305664062, "learning_rate": 6.976052692707624e-10, "loss": 0.3405, "step": 20440 }, { "epoch": 2.97, "grad_norm": 9.584746360778809, "learning_rate": 6.917065656417698e-10, "loss": 0.3676, "step": 20441 }, { "epoch": 2.97, "grad_norm": 8.90230655670166, "learning_rate": 6.858328980584582e-10, "loss": 0.3747, "step": 20442 }, { "epoch": 2.97, "grad_norm": 8.965782165527344, "learning_rate": 6.799842666678213e-10, "loss": 0.322, "step": 20443 }, { "epoch": 2.97, "grad_norm": 7.504408359527588, "learning_rate": 6.741606716164083e-10, "loss": 0.3593, "step": 20444 }, { "epoch": 2.97, "grad_norm": 8.169418334960938, "learning_rate": 6.683621130502138e-10, "loss": 0.3041, "step": 20445 }, { "epoch": 2.97, "grad_norm": 13.383014678955078, "learning_rate": 6.625885911145657e-10, "loss": 0.5042, "step": 20446 }, { "epoch": 2.97, "grad_norm": 9.332229614257812, "learning_rate": 6.568401059539042e-10, "loss": 0.3352, "step": 20447 }, { "epoch": 2.97, "grad_norm": 8.139543533325195, "learning_rate": 6.511166577123362e-10, "loss": 0.3335, "step": 20448 }, { "epoch": 2.97, "grad_norm": 7.843792915344238, "learning_rate": 6.454182465333025e-10, "loss": 0.3024, "step": 20449 }, { "epoch": 2.97, "grad_norm": 10.042109489440918, "learning_rate": 6.397448725594668e-10, "loss": 0.3576, "step": 20450 }, { "epoch": 2.97, "grad_norm": 9.251042366027832, "learning_rate": 6.340965359331596e-10, "loss": 0.393, "step": 20451 }, { "epoch": 2.97, "grad_norm": 10.380797386169434, "learning_rate": 6.284732367956014e-10, "loss": 0.3885, "step": 20452 }, { "epoch": 2.97, "grad_norm": 8.203514099121094, "learning_rate": 6.228749752880125e-10, "loss": 0.3861, "step": 20453 }, { "epoch": 2.97, "grad_norm": 9.109474182128906, "learning_rate": 6.17301751550392e-10, "loss": 0.3207, "step": 20454 }, { "epoch": 2.97, "grad_norm": 9.389816284179688, "learning_rate": 6.11753565722406e-10, "loss": 0.3595, "step": 20455 }, { "epoch": 2.97, "grad_norm": 8.36532974243164, "learning_rate": 6.062304179431655e-10, "loss": 0.3163, "step": 20456 }, { "epoch": 2.97, "grad_norm": 7.950114727020264, "learning_rate": 6.007323083510041e-10, "loss": 0.297, "step": 20457 }, { "epoch": 2.97, "grad_norm": 8.430242538452148, "learning_rate": 5.952592370834786e-10, "loss": 0.2879, "step": 20458 }, { "epoch": 2.97, "grad_norm": 8.365711212158203, "learning_rate": 5.898112042780345e-10, "loss": 0.3375, "step": 20459 }, { "epoch": 2.97, "grad_norm": 9.40341854095459, "learning_rate": 5.843882100710073e-10, "loss": 0.4065, "step": 20460 }, { "epoch": 2.97, "grad_norm": 10.43498706817627, "learning_rate": 5.789902545981772e-10, "loss": 0.3746, "step": 20461 }, { "epoch": 2.97, "grad_norm": 9.245183944702148, "learning_rate": 5.736173379948806e-10, "loss": 0.3681, "step": 20462 }, { "epoch": 2.97, "grad_norm": 8.506335258483887, "learning_rate": 5.682694603956762e-10, "loss": 0.3203, "step": 20463 }, { "epoch": 2.97, "grad_norm": 9.22448444366455, "learning_rate": 5.62946621934568e-10, "loss": 0.3329, "step": 20464 }, { "epoch": 2.97, "grad_norm": 9.061264038085938, "learning_rate": 5.576488227450049e-10, "loss": 0.3714, "step": 20465 }, { "epoch": 2.97, "grad_norm": 7.881269454956055, "learning_rate": 5.523760629595475e-10, "loss": 0.3459, "step": 20466 }, { "epoch": 2.97, "grad_norm": 7.791810035705566, "learning_rate": 5.471283427104234e-10, "loss": 0.3405, "step": 20467 }, { "epoch": 2.97, "grad_norm": 8.611831665039062, "learning_rate": 5.419056621290829e-10, "loss": 0.3491, "step": 20468 }, { "epoch": 2.97, "grad_norm": 8.606966018676758, "learning_rate": 5.367080213461994e-10, "loss": 0.3679, "step": 20469 }, { "epoch": 2.97, "grad_norm": 8.816389083862305, "learning_rate": 5.31535420492335e-10, "loss": 0.3519, "step": 20470 }, { "epoch": 2.97, "grad_norm": 8.182591438293457, "learning_rate": 5.263878596967198e-10, "loss": 0.3368, "step": 20471 }, { "epoch": 2.97, "grad_norm": 9.165231704711914, "learning_rate": 5.212653390885835e-10, "loss": 0.414, "step": 20472 }, { "epoch": 2.97, "grad_norm": 9.376020431518555, "learning_rate": 5.161678587961571e-10, "loss": 0.4258, "step": 20473 }, { "epoch": 2.97, "grad_norm": 7.155663013458252, "learning_rate": 5.110954189471161e-10, "loss": 0.3067, "step": 20474 }, { "epoch": 2.97, "grad_norm": 9.501423835754395, "learning_rate": 5.060480196685812e-10, "loss": 0.361, "step": 20475 }, { "epoch": 2.97, "grad_norm": 8.284677505493164, "learning_rate": 5.010256610870067e-10, "loss": 0.3775, "step": 20476 }, { "epoch": 2.97, "grad_norm": 8.69611644744873, "learning_rate": 4.960283433282919e-10, "loss": 0.358, "step": 20477 }, { "epoch": 2.97, "grad_norm": 8.855976104736328, "learning_rate": 4.91056066517448e-10, "loss": 0.4146, "step": 20478 }, { "epoch": 2.97, "grad_norm": 8.80494499206543, "learning_rate": 4.861088307792637e-10, "loss": 0.3123, "step": 20479 }, { "epoch": 2.97, "grad_norm": 9.42198371887207, "learning_rate": 4.811866362375294e-10, "loss": 0.3521, "step": 20480 }, { "epoch": 2.97, "grad_norm": 8.274053573608398, "learning_rate": 4.762894830155906e-10, "loss": 0.3245, "step": 20481 }, { "epoch": 2.97, "grad_norm": 10.474156379699707, "learning_rate": 4.71417371236238e-10, "loss": 0.4342, "step": 20482 }, { "epoch": 2.97, "grad_norm": 8.182865142822266, "learning_rate": 4.665703010213739e-10, "loss": 0.2873, "step": 20483 }, { "epoch": 2.97, "grad_norm": 10.041281700134277, "learning_rate": 4.617482724925681e-10, "loss": 0.3852, "step": 20484 }, { "epoch": 2.97, "grad_norm": 8.80505657196045, "learning_rate": 4.5695128577061257e-10, "loss": 0.3285, "step": 20485 }, { "epoch": 2.97, "grad_norm": 9.227490425109863, "learning_rate": 4.5217934097563357e-10, "loss": 0.3878, "step": 20486 }, { "epoch": 2.97, "grad_norm": 11.712845802307129, "learning_rate": 4.474324382272021e-10, "loss": 0.4103, "step": 20487 }, { "epoch": 2.97, "grad_norm": 8.253951072692871, "learning_rate": 4.4271057764422303e-10, "loss": 0.3388, "step": 20488 }, { "epoch": 2.97, "grad_norm": 8.323445320129395, "learning_rate": 4.3801375934504617e-10, "loss": 0.339, "step": 20489 }, { "epoch": 2.97, "grad_norm": 7.728893280029297, "learning_rate": 4.333419834473551e-10, "loss": 0.3448, "step": 20490 }, { "epoch": 2.97, "grad_norm": 9.320971488952637, "learning_rate": 4.286952500680563e-10, "loss": 0.3583, "step": 20491 }, { "epoch": 2.97, "grad_norm": 9.185515403747559, "learning_rate": 4.2407355932372324e-10, "loss": 0.3405, "step": 20492 }, { "epoch": 2.97, "grad_norm": 9.299077987670898, "learning_rate": 4.1947691133015215e-10, "loss": 0.3891, "step": 20493 }, { "epoch": 2.97, "grad_norm": 10.06300163269043, "learning_rate": 4.1490530620236217e-10, "loss": 0.4234, "step": 20494 }, { "epoch": 2.97, "grad_norm": 8.135677337646484, "learning_rate": 4.103587440549283e-10, "loss": 0.3525, "step": 20495 }, { "epoch": 2.97, "grad_norm": 9.03748607635498, "learning_rate": 4.0583722500187047e-10, "loss": 0.3875, "step": 20496 }, { "epoch": 2.97, "grad_norm": 9.331940650939941, "learning_rate": 4.013407491563203e-10, "loss": 0.3595, "step": 20497 }, { "epoch": 2.97, "grad_norm": 8.106559753417969, "learning_rate": 3.968693166310766e-10, "loss": 0.3168, "step": 20498 }, { "epoch": 2.97, "grad_norm": 8.543380737304688, "learning_rate": 3.9242292753804973e-10, "loss": 0.3208, "step": 20499 }, { "epoch": 2.97, "grad_norm": 8.685665130615234, "learning_rate": 3.8800158198870614e-10, "loss": 0.3678, "step": 20500 }, { "epoch": 2.97, "grad_norm": 8.692227363586426, "learning_rate": 3.8360528009373507e-10, "loss": 0.299, "step": 20501 }, { "epoch": 2.97, "grad_norm": 7.4677324295043945, "learning_rate": 3.792340219633816e-10, "loss": 0.3166, "step": 20502 }, { "epoch": 2.97, "grad_norm": 8.655234336853027, "learning_rate": 3.748878077070028e-10, "loss": 0.2961, "step": 20503 }, { "epoch": 2.98, "grad_norm": 8.895448684692383, "learning_rate": 3.7056663743373353e-10, "loss": 0.3192, "step": 20504 }, { "epoch": 2.98, "grad_norm": 8.843338012695312, "learning_rate": 3.6627051125159847e-10, "loss": 0.3681, "step": 20505 }, { "epoch": 2.98, "grad_norm": 7.203760147094727, "learning_rate": 3.619994292682893e-10, "loss": 0.3175, "step": 20506 }, { "epoch": 2.98, "grad_norm": 7.893279552459717, "learning_rate": 3.577533915909425e-10, "loss": 0.3112, "step": 20507 }, { "epoch": 2.98, "grad_norm": 8.591768264770508, "learning_rate": 3.535323983258065e-10, "loss": 0.3473, "step": 20508 }, { "epoch": 2.98, "grad_norm": 8.83511734008789, "learning_rate": 3.4933644957857444e-10, "loss": 0.3671, "step": 20509 }, { "epoch": 2.98, "grad_norm": 10.226887702941895, "learning_rate": 3.4516554545460654e-10, "loss": 0.3835, "step": 20510 }, { "epoch": 2.98, "grad_norm": 8.735607147216797, "learning_rate": 3.4101968605815267e-10, "loss": 0.3809, "step": 20511 }, { "epoch": 2.98, "grad_norm": 9.176313400268555, "learning_rate": 3.368988714931298e-10, "loss": 0.3985, "step": 20512 }, { "epoch": 2.98, "grad_norm": 7.490328788757324, "learning_rate": 3.328031018628996e-10, "loss": 0.3321, "step": 20513 }, { "epoch": 2.98, "grad_norm": 9.958573341369629, "learning_rate": 3.287323772700468e-10, "loss": 0.3876, "step": 20514 }, { "epoch": 2.98, "grad_norm": 8.865911483764648, "learning_rate": 3.246866978163787e-10, "loss": 0.3523, "step": 20515 }, { "epoch": 2.98, "grad_norm": 8.121282577514648, "learning_rate": 3.2066606360348083e-10, "loss": 0.3564, "step": 20516 }, { "epoch": 2.98, "grad_norm": 9.602656364440918, "learning_rate": 3.166704747319393e-10, "loss": 0.3486, "step": 20517 }, { "epoch": 2.98, "grad_norm": 7.992827892303467, "learning_rate": 3.126999313018963e-10, "loss": 0.3521, "step": 20518 }, { "epoch": 2.98, "grad_norm": 9.705843925476074, "learning_rate": 3.087544334129388e-10, "loss": 0.3588, "step": 20519 }, { "epoch": 2.98, "grad_norm": 8.07427978515625, "learning_rate": 3.0483398116365465e-10, "loss": 0.3379, "step": 20520 }, { "epoch": 2.98, "grad_norm": 11.464340209960938, "learning_rate": 3.009385746525206e-10, "loss": 0.3418, "step": 20521 }, { "epoch": 2.98, "grad_norm": 9.408897399902344, "learning_rate": 2.970682139770142e-10, "loss": 0.3765, "step": 20522 }, { "epoch": 2.98, "grad_norm": 9.027198791503906, "learning_rate": 2.93222899234169e-10, "loss": 0.322, "step": 20523 }, { "epoch": 2.98, "grad_norm": 9.080506324768066, "learning_rate": 2.8940263052013027e-10, "loss": 0.4348, "step": 20524 }, { "epoch": 2.98, "grad_norm": 8.611135482788086, "learning_rate": 2.856074079309323e-10, "loss": 0.3306, "step": 20525 }, { "epoch": 2.98, "grad_norm": 9.243762969970703, "learning_rate": 2.8183723156127714e-10, "loss": 0.3529, "step": 20526 }, { "epoch": 2.98, "grad_norm": 8.020380020141602, "learning_rate": 2.7809210150597786e-10, "loss": 0.3439, "step": 20527 }, { "epoch": 2.98, "grad_norm": 9.359487533569336, "learning_rate": 2.7437201785862617e-10, "loss": 0.3548, "step": 20528 }, { "epoch": 2.98, "grad_norm": 9.454849243164062, "learning_rate": 2.7067698071259193e-10, "loss": 0.3659, "step": 20529 }, { "epoch": 2.98, "grad_norm": 9.356199264526367, "learning_rate": 2.6700699016024564e-10, "loss": 0.355, "step": 20530 }, { "epoch": 2.98, "grad_norm": 9.645672798156738, "learning_rate": 2.633620462936248e-10, "loss": 0.4018, "step": 20531 }, { "epoch": 2.98, "grad_norm": 10.09472370147705, "learning_rate": 2.5974214920421176e-10, "loss": 0.4126, "step": 20532 }, { "epoch": 2.98, "grad_norm": 9.011860847473145, "learning_rate": 2.5614729898237874e-10, "loss": 0.3778, "step": 20533 }, { "epoch": 2.98, "grad_norm": 8.65742015838623, "learning_rate": 2.5257749571849785e-10, "loss": 0.3389, "step": 20534 }, { "epoch": 2.98, "grad_norm": 8.609689712524414, "learning_rate": 2.49032739501831e-10, "loss": 0.344, "step": 20535 }, { "epoch": 2.98, "grad_norm": 8.15746784210205, "learning_rate": 2.455130304211961e-10, "loss": 0.284, "step": 20536 }, { "epoch": 2.98, "grad_norm": 8.749239921569824, "learning_rate": 2.420183685648558e-10, "loss": 0.3909, "step": 20537 }, { "epoch": 2.98, "grad_norm": 10.077247619628906, "learning_rate": 2.385487540202957e-10, "loss": 0.4401, "step": 20538 }, { "epoch": 2.98, "grad_norm": 8.105652809143066, "learning_rate": 2.351041868743353e-10, "loss": 0.3356, "step": 20539 }, { "epoch": 2.98, "grad_norm": 8.164441108703613, "learning_rate": 2.3168466721357194e-10, "loss": 0.3613, "step": 20540 }, { "epoch": 2.98, "grad_norm": 8.712242126464844, "learning_rate": 2.2829019512338178e-10, "loss": 0.365, "step": 20541 }, { "epoch": 2.98, "grad_norm": 8.0545654296875, "learning_rate": 2.249207706889189e-10, "loss": 0.3723, "step": 20542 }, { "epoch": 2.98, "grad_norm": 8.633123397827148, "learning_rate": 2.2157639399467133e-10, "loss": 0.3721, "step": 20543 }, { "epoch": 2.98, "grad_norm": 7.3011064529418945, "learning_rate": 2.1825706512434984e-10, "loss": 0.2925, "step": 20544 }, { "epoch": 2.98, "grad_norm": 8.113778114318848, "learning_rate": 2.1496278416111013e-10, "loss": 0.3296, "step": 20545 }, { "epoch": 2.98, "grad_norm": 9.214080810546875, "learning_rate": 2.116935511874418e-10, "loss": 0.373, "step": 20546 }, { "epoch": 2.98, "grad_norm": 11.626545906066895, "learning_rate": 2.0844936628539034e-10, "loss": 0.3574, "step": 20547 }, { "epoch": 2.98, "grad_norm": 8.923684120178223, "learning_rate": 2.0523022953611302e-10, "loss": 0.3813, "step": 20548 }, { "epoch": 2.98, "grad_norm": 8.86711597442627, "learning_rate": 2.0203614102021205e-10, "loss": 0.3779, "step": 20549 }, { "epoch": 2.98, "grad_norm": 8.149459838867188, "learning_rate": 1.9886710081795655e-10, "loss": 0.3486, "step": 20550 }, { "epoch": 2.98, "grad_norm": 7.972001075744629, "learning_rate": 1.957231090085054e-10, "loss": 0.3529, "step": 20551 }, { "epoch": 2.98, "grad_norm": 7.9184250831604, "learning_rate": 1.9260416567068448e-10, "loss": 0.3596, "step": 20552 }, { "epoch": 2.98, "grad_norm": 8.69404125213623, "learning_rate": 1.8951027088265348e-10, "loss": 0.3453, "step": 20553 }, { "epoch": 2.98, "grad_norm": 7.359994411468506, "learning_rate": 1.8644142472190593e-10, "loss": 0.3156, "step": 20554 }, { "epoch": 2.98, "grad_norm": 9.571616172790527, "learning_rate": 1.8339762726538034e-10, "loss": 0.3915, "step": 20555 }, { "epoch": 2.98, "grad_norm": 8.617804527282715, "learning_rate": 1.8037887858923796e-10, "loss": 0.2647, "step": 20556 }, { "epoch": 2.98, "grad_norm": 7.573068618774414, "learning_rate": 1.7738517876919602e-10, "loss": 0.3432, "step": 20557 }, { "epoch": 2.98, "grad_norm": 9.865559577941895, "learning_rate": 1.7441652788030559e-10, "loss": 0.4414, "step": 20558 }, { "epoch": 2.98, "grad_norm": 8.74722671508789, "learning_rate": 1.714729259968406e-10, "loss": 0.3829, "step": 20559 }, { "epoch": 2.98, "grad_norm": 7.772888660430908, "learning_rate": 1.6855437319251986e-10, "loss": 0.3231, "step": 20560 }, { "epoch": 2.98, "grad_norm": 8.48880672454834, "learning_rate": 1.6566086954050706e-10, "loss": 0.3368, "step": 20561 }, { "epoch": 2.98, "grad_norm": 8.772960662841797, "learning_rate": 1.6279241511329977e-10, "loss": 0.3062, "step": 20562 }, { "epoch": 2.98, "grad_norm": 9.003371238708496, "learning_rate": 1.599490099827294e-10, "loss": 0.4, "step": 20563 }, { "epoch": 2.98, "grad_norm": 9.990309715270996, "learning_rate": 1.5713065422018334e-10, "loss": 0.3558, "step": 20564 }, { "epoch": 2.98, "grad_norm": 9.24974250793457, "learning_rate": 1.5433734789604969e-10, "loss": 0.3525, "step": 20565 }, { "epoch": 2.98, "grad_norm": 10.772421836853027, "learning_rate": 1.5156909108049455e-10, "loss": 0.4036, "step": 20566 }, { "epoch": 2.98, "grad_norm": 9.627955436706543, "learning_rate": 1.4882588384279582e-10, "loss": 0.365, "step": 20567 }, { "epoch": 2.98, "grad_norm": 10.249504089355469, "learning_rate": 1.4610772625156531e-10, "loss": 0.3536, "step": 20568 }, { "epoch": 2.98, "grad_norm": 8.727087020874023, "learning_rate": 1.4341461837508173e-10, "loss": 0.3614, "step": 20569 }, { "epoch": 2.98, "grad_norm": 8.527536392211914, "learning_rate": 1.4074656028073562e-10, "loss": 0.3781, "step": 20570 }, { "epoch": 2.98, "grad_norm": 7.919356346130371, "learning_rate": 1.381035520354734e-10, "loss": 0.3198, "step": 20571 }, { "epoch": 2.98, "grad_norm": 8.0327730178833, "learning_rate": 1.354855937053534e-10, "loss": 0.35, "step": 20572 }, { "epoch": 2.99, "grad_norm": 9.797980308532715, "learning_rate": 1.3289268535598975e-10, "loss": 0.388, "step": 20573 }, { "epoch": 2.99, "grad_norm": 9.32867431640625, "learning_rate": 1.3032482705244152e-10, "loss": 0.3313, "step": 20574 }, { "epoch": 2.99, "grad_norm": 8.80720329284668, "learning_rate": 1.2778201885899066e-10, "loss": 0.3534, "step": 20575 }, { "epoch": 2.99, "grad_norm": 9.62246036529541, "learning_rate": 1.252642608392529e-10, "loss": 0.4167, "step": 20576 }, { "epoch": 2.99, "grad_norm": 8.990901947021484, "learning_rate": 1.2277155305651098e-10, "loss": 0.3331, "step": 20577 }, { "epoch": 2.99, "grad_norm": 7.968013286590576, "learning_rate": 1.2030389557304844e-10, "loss": 0.3418, "step": 20578 }, { "epoch": 2.99, "grad_norm": 10.777344703674316, "learning_rate": 1.1786128845070465e-10, "loss": 0.3918, "step": 20579 }, { "epoch": 2.99, "grad_norm": 8.42259693145752, "learning_rate": 1.1544373175076394e-10, "loss": 0.4108, "step": 20580 }, { "epoch": 2.99, "grad_norm": 9.321825981140137, "learning_rate": 1.1305122553373347e-10, "loss": 0.3938, "step": 20581 }, { "epoch": 2.99, "grad_norm": 7.667535305023193, "learning_rate": 1.1068376985956529e-10, "loss": 0.3443, "step": 20582 }, { "epoch": 2.99, "grad_norm": 7.924094200134277, "learning_rate": 1.083413647875453e-10, "loss": 0.3151, "step": 20583 }, { "epoch": 2.99, "grad_norm": 7.997232913970947, "learning_rate": 1.0602401037629328e-10, "loss": 0.3362, "step": 20584 }, { "epoch": 2.99, "grad_norm": 8.511903762817383, "learning_rate": 1.0373170668409592e-10, "loss": 0.3186, "step": 20585 }, { "epoch": 2.99, "grad_norm": 8.507040977478027, "learning_rate": 1.0146445376812974e-10, "loss": 0.3218, "step": 20586 }, { "epoch": 2.99, "grad_norm": 10.018357276916504, "learning_rate": 9.922225168534914e-11, "loss": 0.3474, "step": 20587 }, { "epoch": 2.99, "grad_norm": 8.606209754943848, "learning_rate": 9.700510049193144e-11, "loss": 0.3713, "step": 20588 }, { "epoch": 2.99, "grad_norm": 8.515300750732422, "learning_rate": 9.481300024338778e-11, "loss": 0.3933, "step": 20589 }, { "epoch": 2.99, "grad_norm": 8.653502464294434, "learning_rate": 9.264595099456318e-11, "loss": 0.3128, "step": 20590 }, { "epoch": 2.99, "grad_norm": 7.693937301635742, "learning_rate": 9.050395279985856e-11, "loss": 0.3408, "step": 20591 }, { "epoch": 2.99, "grad_norm": 8.077895164489746, "learning_rate": 8.838700571278668e-11, "loss": 0.3121, "step": 20592 }, { "epoch": 2.99, "grad_norm": 7.675490856170654, "learning_rate": 8.629510978663823e-11, "loss": 0.3381, "step": 20593 }, { "epoch": 2.99, "grad_norm": 9.245388984680176, "learning_rate": 8.42282650735937e-11, "loss": 0.3026, "step": 20594 }, { "epoch": 2.99, "grad_norm": 7.725240230560303, "learning_rate": 8.218647162561154e-11, "loss": 0.3184, "step": 20595 }, { "epoch": 2.99, "grad_norm": 7.91410493850708, "learning_rate": 8.016972949365097e-11, "loss": 0.3229, "step": 20596 }, { "epoch": 2.99, "grad_norm": 8.560952186584473, "learning_rate": 7.817803872833816e-11, "loss": 0.361, "step": 20597 }, { "epoch": 2.99, "grad_norm": 9.947739601135254, "learning_rate": 7.621139937963317e-11, "loss": 0.346, "step": 20598 }, { "epoch": 2.99, "grad_norm": 8.50965404510498, "learning_rate": 7.426981149671885e-11, "loss": 0.3395, "step": 20599 }, { "epoch": 2.99, "grad_norm": 7.125328540802002, "learning_rate": 7.235327512822298e-11, "loss": 0.2933, "step": 20600 }, { "epoch": 2.99, "grad_norm": 9.259121894836426, "learning_rate": 7.046179032232923e-11, "loss": 0.4246, "step": 20601 }, { "epoch": 2.99, "grad_norm": 8.609389305114746, "learning_rate": 6.85953571262221e-11, "loss": 0.3213, "step": 20602 }, { "epoch": 2.99, "grad_norm": 12.18664836883545, "learning_rate": 6.675397558664197e-11, "loss": 0.4459, "step": 20603 }, { "epoch": 2.99, "grad_norm": 7.78980016708374, "learning_rate": 6.493764574999616e-11, "loss": 0.3675, "step": 20604 }, { "epoch": 2.99, "grad_norm": 9.146231651306152, "learning_rate": 6.314636766147074e-11, "loss": 0.3791, "step": 20605 }, { "epoch": 2.99, "grad_norm": 8.472667694091797, "learning_rate": 6.138014136614078e-11, "loss": 0.3437, "step": 20606 }, { "epoch": 2.99, "grad_norm": 8.765917778015137, "learning_rate": 5.963896690819314e-11, "loss": 0.4028, "step": 20607 }, { "epoch": 2.99, "grad_norm": 8.491522789001465, "learning_rate": 5.792284433137063e-11, "loss": 0.3226, "step": 20608 }, { "epoch": 2.99, "grad_norm": 8.651792526245117, "learning_rate": 5.6231773678416805e-11, "loss": 0.3309, "step": 20609 }, { "epoch": 2.99, "grad_norm": 8.12099552154541, "learning_rate": 5.456575499185323e-11, "loss": 0.3043, "step": 20610 }, { "epoch": 2.99, "grad_norm": 8.530341148376465, "learning_rate": 5.2924788313424286e-11, "loss": 0.3801, "step": 20611 }, { "epoch": 2.99, "grad_norm": 9.237767219543457, "learning_rate": 5.130887368431924e-11, "loss": 0.3921, "step": 20612 }, { "epoch": 2.99, "grad_norm": 7.954387187957764, "learning_rate": 4.97180111448392e-11, "loss": 0.3664, "step": 20613 }, { "epoch": 2.99, "grad_norm": 8.437204360961914, "learning_rate": 4.815220073484116e-11, "loss": 0.3322, "step": 20614 }, { "epoch": 2.99, "grad_norm": 9.359346389770508, "learning_rate": 4.661144249373805e-11, "loss": 0.3917, "step": 20615 }, { "epoch": 2.99, "grad_norm": 7.511048316955566, "learning_rate": 4.5095736460054604e-11, "loss": 0.3385, "step": 20616 }, { "epoch": 2.99, "grad_norm": 9.387240409851074, "learning_rate": 4.360508267164942e-11, "loss": 0.361, "step": 20617 }, { "epoch": 2.99, "grad_norm": 8.822927474975586, "learning_rate": 4.213948116604804e-11, "loss": 0.3835, "step": 20618 }, { "epoch": 2.99, "grad_norm": 7.670776844024658, "learning_rate": 4.069893197988783e-11, "loss": 0.3246, "step": 20619 }, { "epoch": 2.99, "grad_norm": 8.200258255004883, "learning_rate": 3.928343514914001e-11, "loss": 0.3356, "step": 20620 }, { "epoch": 2.99, "grad_norm": 8.541387557983398, "learning_rate": 3.789299070944274e-11, "loss": 0.3779, "step": 20621 }, { "epoch": 2.99, "grad_norm": 8.729205131530762, "learning_rate": 3.6527598695657026e-11, "loss": 0.3958, "step": 20622 }, { "epoch": 2.99, "grad_norm": 10.99903392791748, "learning_rate": 3.5187259141755684e-11, "loss": 0.3683, "step": 20623 }, { "epoch": 2.99, "grad_norm": 8.79613208770752, "learning_rate": 3.387197208160053e-11, "loss": 0.3368, "step": 20624 }, { "epoch": 2.99, "grad_norm": 8.68280029296875, "learning_rate": 3.2581737547943133e-11, "loss": 0.3909, "step": 20625 }, { "epoch": 2.99, "grad_norm": 9.032149314880371, "learning_rate": 3.1316555573090985e-11, "loss": 0.3115, "step": 20626 }, { "epoch": 2.99, "grad_norm": 7.917364120483398, "learning_rate": 3.007642618890749e-11, "loss": 0.3908, "step": 20627 }, { "epoch": 2.99, "grad_norm": 9.457905769348145, "learning_rate": 2.8861349426478886e-11, "loss": 0.3861, "step": 20628 }, { "epoch": 2.99, "grad_norm": 10.015571594238281, "learning_rate": 2.7671325316003246e-11, "loss": 0.3681, "step": 20629 }, { "epoch": 2.99, "grad_norm": 8.672276496887207, "learning_rate": 2.650635388745659e-11, "loss": 0.2983, "step": 20630 }, { "epoch": 2.99, "grad_norm": 7.777581691741943, "learning_rate": 2.5366435170037782e-11, "loss": 0.3245, "step": 20631 }, { "epoch": 2.99, "grad_norm": 10.074628829956055, "learning_rate": 2.4251569192279553e-11, "loss": 0.3966, "step": 20632 }, { "epoch": 2.99, "grad_norm": 9.079468727111816, "learning_rate": 2.3161755982048503e-11, "loss": 0.3772, "step": 20633 }, { "epoch": 2.99, "grad_norm": 9.654882431030273, "learning_rate": 2.209699556676714e-11, "loss": 0.3349, "step": 20634 }, { "epoch": 2.99, "grad_norm": 9.733240127563477, "learning_rate": 2.1057287973080817e-11, "loss": 0.3308, "step": 20635 }, { "epoch": 2.99, "grad_norm": 7.937615394592285, "learning_rate": 2.004263322696875e-11, "loss": 0.3474, "step": 20636 }, { "epoch": 2.99, "grad_norm": 8.096454620361328, "learning_rate": 1.9053031353855054e-11, "loss": 0.3318, "step": 20637 }, { "epoch": 2.99, "grad_norm": 9.501262664794922, "learning_rate": 1.8088482378608717e-11, "loss": 0.3865, "step": 20638 }, { "epoch": 2.99, "grad_norm": 8.918780326843262, "learning_rate": 1.7148986325432602e-11, "loss": 0.3658, "step": 20639 }, { "epoch": 2.99, "grad_norm": 9.089573860168457, "learning_rate": 1.6234543217641395e-11, "loss": 0.4022, "step": 20640 }, { "epoch": 2.99, "grad_norm": 8.881893157958984, "learning_rate": 1.5345153078438756e-11, "loss": 0.3605, "step": 20641 }, { "epoch": 3.0, "grad_norm": 7.485718727111816, "learning_rate": 1.44808159298071e-11, "loss": 0.3079, "step": 20642 }, { "epoch": 3.0, "grad_norm": 9.102293968200684, "learning_rate": 1.3641531793617822e-11, "loss": 0.3694, "step": 20643 }, { "epoch": 3.0, "grad_norm": 7.042499542236328, "learning_rate": 1.2827300690854137e-11, "loss": 0.2781, "step": 20644 }, { "epoch": 3.0, "grad_norm": 9.157890319824219, "learning_rate": 1.2038122641944148e-11, "loss": 0.3154, "step": 20645 }, { "epoch": 3.0, "grad_norm": 8.068727493286133, "learning_rate": 1.1273997666538804e-11, "loss": 0.3488, "step": 20646 }, { "epoch": 3.0, "grad_norm": 8.058564186096191, "learning_rate": 1.053492578384496e-11, "loss": 0.3491, "step": 20647 }, { "epoch": 3.0, "grad_norm": 8.269947052001953, "learning_rate": 9.820907012403345e-12, "loss": 0.3356, "step": 20648 }, { "epoch": 3.0, "grad_norm": 9.894759178161621, "learning_rate": 9.131941370088548e-12, "loss": 0.3909, "step": 20649 }, { "epoch": 3.0, "grad_norm": 8.804544448852539, "learning_rate": 8.468028874109023e-12, "loss": 0.3217, "step": 20650 }, { "epoch": 3.0, "grad_norm": 9.065418243408203, "learning_rate": 7.829169541229141e-12, "loss": 0.3893, "step": 20651 }, { "epoch": 3.0, "grad_norm": 8.846603393554688, "learning_rate": 7.2153633873250905e-12, "loss": 0.3916, "step": 20652 }, { "epoch": 3.0, "grad_norm": 8.472325325012207, "learning_rate": 6.626610427828971e-12, "loss": 0.3292, "step": 20653 }, { "epoch": 3.0, "grad_norm": 8.600539207458496, "learning_rate": 6.0629106775067495e-12, "loss": 0.3529, "step": 20654 }, { "epoch": 3.0, "grad_norm": 9.133834838867188, "learning_rate": 5.524264150458257e-12, "loss": 0.3646, "step": 20655 }, { "epoch": 3.0, "grad_norm": 8.734662055969238, "learning_rate": 5.0106708601171946e-12, "loss": 0.3753, "step": 20656 }, { "epoch": 3.0, "grad_norm": 9.302593231201172, "learning_rate": 4.522130819473169e-12, "loss": 0.3394, "step": 20657 }, { "epoch": 3.0, "grad_norm": 7.16493034362793, "learning_rate": 4.058644040738635e-12, "loss": 0.3405, "step": 20658 }, { "epoch": 3.0, "grad_norm": 8.703243255615234, "learning_rate": 3.620210535459911e-12, "loss": 0.4054, "step": 20659 }, { "epoch": 3.0, "grad_norm": 9.888067245483398, "learning_rate": 3.2068303146282062e-12, "loss": 0.3574, "step": 20660 }, { "epoch": 3.0, "grad_norm": 9.217949867248535, "learning_rate": 2.818503388568594e-12, "loss": 0.28, "step": 20661 }, { "epoch": 3.0, "grad_norm": 8.643386840820312, "learning_rate": 2.455229767162059e-12, "loss": 0.3746, "step": 20662 }, { "epoch": 3.0, "grad_norm": 7.786724090576172, "learning_rate": 2.1170094594014087e-12, "loss": 0.3075, "step": 20663 }, { "epoch": 3.0, "grad_norm": 8.558117866516113, "learning_rate": 1.8038424737243374e-12, "loss": 0.3536, "step": 20664 }, { "epoch": 3.0, "grad_norm": 7.576511859893799, "learning_rate": 1.5157288181244509e-12, "loss": 0.319, "step": 20665 }, { "epoch": 3.0, "grad_norm": 8.646366119384766, "learning_rate": 1.2526684995961544e-12, "loss": 0.3616, "step": 20666 }, { "epoch": 3.0, "grad_norm": 7.713961601257324, "learning_rate": 1.0146615249118084e-12, "loss": 0.3291, "step": 20667 }, { "epoch": 3.0, "grad_norm": 8.721087455749512, "learning_rate": 8.017079000666171e-13, "loss": 0.3493, "step": 20668 }, { "epoch": 3.0, "grad_norm": 8.762423515319824, "learning_rate": 6.138076301676065e-13, "loss": 0.3528, "step": 20669 }, { "epoch": 3.0, "grad_norm": 9.240011215209961, "learning_rate": 4.509607200997578e-13, "loss": 0.3793, "step": 20670 }, { "epoch": 3.0, "grad_norm": 10.898151397705078, "learning_rate": 3.131671739708963e-13, "loss": 0.423, "step": 20671 }, { "epoch": 3.0, "grad_norm": 8.383061408996582, "learning_rate": 2.0042699511169104e-13, "loss": 0.3215, "step": 20672 }, { "epoch": 3.0, "grad_norm": 8.743500709533691, "learning_rate": 1.1274018640872185e-13, "loss": 0.3513, "step": 20673 }, { "epoch": 3.0, "grad_norm": 7.6378679275512695, "learning_rate": 5.0106750082434815e-14, "loss": 0.2872, "step": 20674 }, { "epoch": 3.0, "grad_norm": 8.252094268798828, "learning_rate": 1.2526687576119855e-14, "loss": 0.3349, "step": 20675 }, { "epoch": 3.0, "grad_norm": 7.520442962646484, "learning_rate": 0.0, "loss": 0.2464, "step": 20676 }, { "epoch": 3.0, "step": 20676, "total_flos": 2.8905309761351713e+19, "train_loss": 0.5445510355165074, "train_runtime": 243367.5838, "train_samples_per_second": 5.437, "train_steps_per_second": 0.085 } ], "logging_steps": 1.0, "max_steps": 20676, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.8905309761351713e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }