{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6533, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 383.1503991853064, "learning_rate": 1.0204081632653061e-07, "loss": 3.16, "step": 1 }, { "epoch": 0.0, "grad_norm": 1115.042586838023, "learning_rate": 2.0408163265306121e-07, "loss": 3.4515, "step": 2 }, { "epoch": 0.0, "grad_norm": 641.1581033180005, "learning_rate": 3.0612244897959183e-07, "loss": 3.4183, "step": 3 }, { "epoch": 0.0, "grad_norm": 543.9001930338954, "learning_rate": 4.0816326530612243e-07, "loss": 3.1405, "step": 4 }, { "epoch": 0.0, "grad_norm": 415.6700527460039, "learning_rate": 5.102040816326531e-07, "loss": 2.9573, "step": 5 }, { "epoch": 0.0, "grad_norm": 560.8177502149401, "learning_rate": 6.122448979591837e-07, "loss": 3.0618, "step": 6 }, { "epoch": 0.0, "grad_norm": 549.9535790740002, "learning_rate": 7.142857142857143e-07, "loss": 2.787, "step": 7 }, { "epoch": 0.0, "grad_norm": 135.6880950548416, "learning_rate": 8.163265306122449e-07, "loss": 2.1078, "step": 8 }, { "epoch": 0.0, "grad_norm": 304.45317074621744, "learning_rate": 9.183673469387756e-07, "loss": 2.0253, "step": 9 }, { "epoch": 0.0, "grad_norm": 129.6075967509114, "learning_rate": 1.0204081632653063e-06, "loss": 1.987, "step": 10 }, { "epoch": 0.0, "grad_norm": 116.64503619601689, "learning_rate": 1.122448979591837e-06, "loss": 1.7659, "step": 11 }, { "epoch": 0.0, "grad_norm": 82.57909679030715, "learning_rate": 1.2244897959183673e-06, "loss": 1.7313, "step": 12 }, { "epoch": 0.0, "grad_norm": 51.7873071107413, "learning_rate": 1.3265306122448982e-06, "loss": 1.6111, "step": 13 }, { "epoch": 0.0, "grad_norm": 21.55146603646816, "learning_rate": 1.4285714285714286e-06, "loss": 0.6476, "step": 14 }, { "epoch": 0.0, "grad_norm": 52.146128170413405, "learning_rate": 1.5306122448979593e-06, "loss": 1.4587, "step": 15 }, { "epoch": 0.0, "grad_norm": 25.018684442954445, "learning_rate": 1.6326530612244897e-06, "loss": 1.5048, "step": 16 }, { "epoch": 0.0, "grad_norm": 10.633909350728741, "learning_rate": 1.7346938775510206e-06, "loss": 0.58, "step": 17 }, { "epoch": 0.0, "grad_norm": 31.3525327981405, "learning_rate": 1.8367346938775512e-06, "loss": 1.257, "step": 18 }, { "epoch": 0.0, "grad_norm": 43.10854106994972, "learning_rate": 1.938775510204082e-06, "loss": 1.2033, "step": 19 }, { "epoch": 0.0, "grad_norm": 19.144610314079856, "learning_rate": 2.0408163265306125e-06, "loss": 1.1375, "step": 20 }, { "epoch": 0.0, "grad_norm": 24.783699466031027, "learning_rate": 2.1428571428571427e-06, "loss": 1.1003, "step": 21 }, { "epoch": 0.0, "grad_norm": 11.60475253722536, "learning_rate": 2.244897959183674e-06, "loss": 1.2054, "step": 22 }, { "epoch": 0.0, "grad_norm": 10.340451063129958, "learning_rate": 2.3469387755102044e-06, "loss": 1.055, "step": 23 }, { "epoch": 0.0, "grad_norm": 10.793621550631057, "learning_rate": 2.4489795918367347e-06, "loss": 1.1943, "step": 24 }, { "epoch": 0.0, "grad_norm": 22.91654748783573, "learning_rate": 2.5510204081632657e-06, "loss": 1.1807, "step": 25 }, { "epoch": 0.0, "grad_norm": 11.665266982661313, "learning_rate": 2.6530612244897964e-06, "loss": 1.1996, "step": 26 }, { "epoch": 0.0, "grad_norm": 10.279207516291493, "learning_rate": 2.7551020408163266e-06, "loss": 1.1656, "step": 27 }, { "epoch": 0.0, "grad_norm": 17.967120212081813, "learning_rate": 2.8571428571428573e-06, "loss": 1.1532, "step": 28 }, { "epoch": 0.0, "grad_norm": 10.383928985415782, "learning_rate": 2.959183673469388e-06, "loss": 0.9899, "step": 29 }, { "epoch": 0.0, "grad_norm": 18.203716579957373, "learning_rate": 3.0612244897959185e-06, "loss": 1.0944, "step": 30 }, { "epoch": 0.0, "grad_norm": 10.071417491040497, "learning_rate": 3.1632653061224496e-06, "loss": 1.1039, "step": 31 }, { "epoch": 0.0, "grad_norm": 11.075535174989573, "learning_rate": 3.2653061224489794e-06, "loss": 1.0426, "step": 32 }, { "epoch": 0.01, "grad_norm": 8.406595669779419, "learning_rate": 3.3673469387755105e-06, "loss": 1.0862, "step": 33 }, { "epoch": 0.01, "grad_norm": 5.618904672536877, "learning_rate": 3.469387755102041e-06, "loss": 0.4901, "step": 34 }, { "epoch": 0.01, "grad_norm": 22.979117055215557, "learning_rate": 3.5714285714285718e-06, "loss": 1.1253, "step": 35 }, { "epoch": 0.01, "grad_norm": 98.03089037716222, "learning_rate": 3.6734693877551024e-06, "loss": 1.1556, "step": 36 }, { "epoch": 0.01, "grad_norm": 11.287036563173132, "learning_rate": 3.7755102040816327e-06, "loss": 1.0596, "step": 37 }, { "epoch": 0.01, "grad_norm": 13.815097934554741, "learning_rate": 3.877551020408164e-06, "loss": 1.0359, "step": 38 }, { "epoch": 0.01, "grad_norm": 13.600743070021583, "learning_rate": 3.979591836734694e-06, "loss": 1.0887, "step": 39 }, { "epoch": 0.01, "grad_norm": 9.078085686869478, "learning_rate": 4.081632653061225e-06, "loss": 1.0267, "step": 40 }, { "epoch": 0.01, "grad_norm": 8.085579534504056, "learning_rate": 4.183673469387755e-06, "loss": 1.0953, "step": 41 }, { "epoch": 0.01, "grad_norm": 9.251690714050342, "learning_rate": 4.2857142857142855e-06, "loss": 1.0291, "step": 42 }, { "epoch": 0.01, "grad_norm": 10.157487900257816, "learning_rate": 4.3877551020408165e-06, "loss": 1.1289, "step": 43 }, { "epoch": 0.01, "grad_norm": 15.394745194540516, "learning_rate": 4.489795918367348e-06, "loss": 1.1724, "step": 44 }, { "epoch": 0.01, "grad_norm": 8.206250385388731, "learning_rate": 4.591836734693878e-06, "loss": 1.0897, "step": 45 }, { "epoch": 0.01, "grad_norm": 11.385889193455325, "learning_rate": 4.693877551020409e-06, "loss": 1.0748, "step": 46 }, { "epoch": 0.01, "grad_norm": 8.013036055651966, "learning_rate": 4.795918367346939e-06, "loss": 1.0592, "step": 47 }, { "epoch": 0.01, "grad_norm": 6.540522265960379, "learning_rate": 4.897959183673469e-06, "loss": 0.9947, "step": 48 }, { "epoch": 0.01, "grad_norm": 5.128507543319763, "learning_rate": 5e-06, "loss": 0.4851, "step": 49 }, { "epoch": 0.01, "grad_norm": 7.456741645011198, "learning_rate": 5.1020408163265315e-06, "loss": 1.1242, "step": 50 }, { "epoch": 0.01, "grad_norm": 9.073951090190663, "learning_rate": 5.204081632653062e-06, "loss": 1.1371, "step": 51 }, { "epoch": 0.01, "grad_norm": 7.848944210654246, "learning_rate": 5.306122448979593e-06, "loss": 0.9847, "step": 52 }, { "epoch": 0.01, "grad_norm": 6.814113508474519, "learning_rate": 5.408163265306123e-06, "loss": 0.965, "step": 53 }, { "epoch": 0.01, "grad_norm": 6.075740772611151, "learning_rate": 5.510204081632653e-06, "loss": 0.4762, "step": 54 }, { "epoch": 0.01, "grad_norm": 8.253826117056938, "learning_rate": 5.6122448979591834e-06, "loss": 1.0542, "step": 55 }, { "epoch": 0.01, "grad_norm": 8.609978272010776, "learning_rate": 5.7142857142857145e-06, "loss": 1.0704, "step": 56 }, { "epoch": 0.01, "grad_norm": 9.397718068371473, "learning_rate": 5.816326530612246e-06, "loss": 1.1626, "step": 57 }, { "epoch": 0.01, "grad_norm": 7.260133946689537, "learning_rate": 5.918367346938776e-06, "loss": 1.0113, "step": 58 }, { "epoch": 0.01, "grad_norm": 3.852944108730934, "learning_rate": 6.020408163265307e-06, "loss": 0.4874, "step": 59 }, { "epoch": 0.01, "grad_norm": 6.178769332745159, "learning_rate": 6.122448979591837e-06, "loss": 1.0644, "step": 60 }, { "epoch": 0.01, "grad_norm": 7.103892360369785, "learning_rate": 6.224489795918368e-06, "loss": 1.0928, "step": 61 }, { "epoch": 0.01, "grad_norm": 7.494779873667326, "learning_rate": 6.326530612244899e-06, "loss": 1.0097, "step": 62 }, { "epoch": 0.01, "grad_norm": 6.629387087808804, "learning_rate": 6.4285714285714295e-06, "loss": 1.0211, "step": 63 }, { "epoch": 0.01, "grad_norm": 7.464978091176936, "learning_rate": 6.530612244897959e-06, "loss": 1.0977, "step": 64 }, { "epoch": 0.01, "grad_norm": 7.660183593852527, "learning_rate": 6.63265306122449e-06, "loss": 1.0661, "step": 65 }, { "epoch": 0.01, "grad_norm": 7.831455627108557, "learning_rate": 6.734693877551021e-06, "loss": 1.0992, "step": 66 }, { "epoch": 0.01, "grad_norm": 6.722994865015469, "learning_rate": 6.836734693877551e-06, "loss": 1.1489, "step": 67 }, { "epoch": 0.01, "grad_norm": 7.8110902501706425, "learning_rate": 6.938775510204082e-06, "loss": 1.0966, "step": 68 }, { "epoch": 0.01, "grad_norm": 7.14628522025491, "learning_rate": 7.0408163265306125e-06, "loss": 0.9751, "step": 69 }, { "epoch": 0.01, "grad_norm": 6.698256800772039, "learning_rate": 7.1428571428571436e-06, "loss": 0.9841, "step": 70 }, { "epoch": 0.01, "grad_norm": 6.727266458576524, "learning_rate": 7.244897959183675e-06, "loss": 1.078, "step": 71 }, { "epoch": 0.01, "grad_norm": 9.712483103175598, "learning_rate": 7.346938775510205e-06, "loss": 0.4774, "step": 72 }, { "epoch": 0.01, "grad_norm": 7.267739903860731, "learning_rate": 7.448979591836736e-06, "loss": 1.0613, "step": 73 }, { "epoch": 0.01, "grad_norm": 11.970987660453837, "learning_rate": 7.551020408163265e-06, "loss": 1.0343, "step": 74 }, { "epoch": 0.01, "grad_norm": 6.088617564760351, "learning_rate": 7.653061224489796e-06, "loss": 0.9876, "step": 75 }, { "epoch": 0.01, "grad_norm": 6.702281289345013, "learning_rate": 7.755102040816327e-06, "loss": 1.055, "step": 76 }, { "epoch": 0.01, "grad_norm": 6.672033119575265, "learning_rate": 7.857142857142858e-06, "loss": 0.9966, "step": 77 }, { "epoch": 0.01, "grad_norm": 5.069957763879359, "learning_rate": 7.959183673469388e-06, "loss": 0.5214, "step": 78 }, { "epoch": 0.01, "grad_norm": 10.247445520922536, "learning_rate": 8.06122448979592e-06, "loss": 1.0514, "step": 79 }, { "epoch": 0.01, "grad_norm": 11.516511852647083, "learning_rate": 8.16326530612245e-06, "loss": 1.0861, "step": 80 }, { "epoch": 0.01, "grad_norm": 8.052749314346663, "learning_rate": 8.26530612244898e-06, "loss": 1.016, "step": 81 }, { "epoch": 0.01, "grad_norm": 6.750995746740932, "learning_rate": 8.36734693877551e-06, "loss": 1.1089, "step": 82 }, { "epoch": 0.01, "grad_norm": 8.713321176065724, "learning_rate": 8.469387755102042e-06, "loss": 1.0066, "step": 83 }, { "epoch": 0.01, "grad_norm": 7.794509522449217, "learning_rate": 8.571428571428571e-06, "loss": 1.012, "step": 84 }, { "epoch": 0.01, "grad_norm": 6.664116358377716, "learning_rate": 8.673469387755103e-06, "loss": 1.0805, "step": 85 }, { "epoch": 0.01, "grad_norm": 6.320697850800005, "learning_rate": 8.775510204081633e-06, "loss": 1.0494, "step": 86 }, { "epoch": 0.01, "grad_norm": 7.006680080855596, "learning_rate": 8.877551020408163e-06, "loss": 1.0981, "step": 87 }, { "epoch": 0.01, "grad_norm": 6.213977754985643, "learning_rate": 8.979591836734695e-06, "loss": 1.0488, "step": 88 }, { "epoch": 0.01, "grad_norm": 5.977104454473886, "learning_rate": 9.081632653061225e-06, "loss": 0.9923, "step": 89 }, { "epoch": 0.01, "grad_norm": 26.93170641637084, "learning_rate": 9.183673469387756e-06, "loss": 1.1516, "step": 90 }, { "epoch": 0.01, "grad_norm": 6.731873248521301, "learning_rate": 9.285714285714288e-06, "loss": 1.0429, "step": 91 }, { "epoch": 0.01, "grad_norm": 6.4993308961508784, "learning_rate": 9.387755102040818e-06, "loss": 1.0329, "step": 92 }, { "epoch": 0.01, "grad_norm": 6.509109125779879, "learning_rate": 9.489795918367348e-06, "loss": 1.028, "step": 93 }, { "epoch": 0.01, "grad_norm": 7.2065413570334895, "learning_rate": 9.591836734693878e-06, "loss": 1.0646, "step": 94 }, { "epoch": 0.01, "grad_norm": 9.809366591371024, "learning_rate": 9.693877551020408e-06, "loss": 1.1289, "step": 95 }, { "epoch": 0.01, "grad_norm": 7.476657106326629, "learning_rate": 9.795918367346939e-06, "loss": 1.0345, "step": 96 }, { "epoch": 0.01, "grad_norm": 6.927969816535037, "learning_rate": 9.89795918367347e-06, "loss": 0.996, "step": 97 }, { "epoch": 0.02, "grad_norm": 59.64526210339528, "learning_rate": 1e-05, "loss": 1.0362, "step": 98 }, { "epoch": 0.02, "grad_norm": 6.401085627447702, "learning_rate": 1.0102040816326531e-05, "loss": 1.0387, "step": 99 }, { "epoch": 0.02, "grad_norm": 6.258010610505343, "learning_rate": 1.0204081632653063e-05, "loss": 1.145, "step": 100 }, { "epoch": 0.02, "grad_norm": 5.661261424729024, "learning_rate": 1.0306122448979591e-05, "loss": 1.1126, "step": 101 }, { "epoch": 0.02, "grad_norm": 6.257632267247965, "learning_rate": 1.0408163265306123e-05, "loss": 0.9783, "step": 102 }, { "epoch": 0.02, "grad_norm": 5.410703256358511, "learning_rate": 1.0510204081632654e-05, "loss": 1.0303, "step": 103 }, { "epoch": 0.02, "grad_norm": 6.848851190925594, "learning_rate": 1.0612244897959186e-05, "loss": 1.1671, "step": 104 }, { "epoch": 0.02, "grad_norm": 6.5957945532873214, "learning_rate": 1.0714285714285714e-05, "loss": 1.1229, "step": 105 }, { "epoch": 0.02, "grad_norm": 7.606270565711428, "learning_rate": 1.0816326530612246e-05, "loss": 1.0775, "step": 106 }, { "epoch": 0.02, "grad_norm": 6.7030893221224215, "learning_rate": 1.0918367346938776e-05, "loss": 1.1389, "step": 107 }, { "epoch": 0.02, "grad_norm": 6.739502719203268, "learning_rate": 1.1020408163265306e-05, "loss": 1.0237, "step": 108 }, { "epoch": 0.02, "grad_norm": 5.301768842909257, "learning_rate": 1.1122448979591838e-05, "loss": 0.9889, "step": 109 }, { "epoch": 0.02, "grad_norm": 7.18146117465339, "learning_rate": 1.1224489795918367e-05, "loss": 1.0543, "step": 110 }, { "epoch": 0.02, "grad_norm": 6.047654607041304, "learning_rate": 1.1326530612244899e-05, "loss": 1.09, "step": 111 }, { "epoch": 0.02, "grad_norm": 7.637823055554012, "learning_rate": 1.1428571428571429e-05, "loss": 1.0977, "step": 112 }, { "epoch": 0.02, "grad_norm": 6.281759210863109, "learning_rate": 1.1530612244897961e-05, "loss": 1.1304, "step": 113 }, { "epoch": 0.02, "grad_norm": 5.552251083233848, "learning_rate": 1.1632653061224491e-05, "loss": 1.0337, "step": 114 }, { "epoch": 0.02, "grad_norm": 6.73550542774755, "learning_rate": 1.1734693877551021e-05, "loss": 1.0381, "step": 115 }, { "epoch": 0.02, "grad_norm": 5.767902308250887, "learning_rate": 1.1836734693877552e-05, "loss": 1.0879, "step": 116 }, { "epoch": 0.02, "grad_norm": 5.813147914121843, "learning_rate": 1.1938775510204084e-05, "loss": 1.0883, "step": 117 }, { "epoch": 0.02, "grad_norm": 6.756174166724238, "learning_rate": 1.2040816326530614e-05, "loss": 0.9809, "step": 118 }, { "epoch": 0.02, "grad_norm": 5.362500089300721, "learning_rate": 1.2142857142857142e-05, "loss": 1.0745, "step": 119 }, { "epoch": 0.02, "grad_norm": 6.699334275929911, "learning_rate": 1.2244897959183674e-05, "loss": 1.0208, "step": 120 }, { "epoch": 0.02, "grad_norm": 5.953344832033511, "learning_rate": 1.2346938775510204e-05, "loss": 0.9662, "step": 121 }, { "epoch": 0.02, "grad_norm": 6.090119536112503, "learning_rate": 1.2448979591836736e-05, "loss": 1.1166, "step": 122 }, { "epoch": 0.02, "grad_norm": 6.724578786244247, "learning_rate": 1.2551020408163267e-05, "loss": 1.0081, "step": 123 }, { "epoch": 0.02, "grad_norm": 6.82491736656003, "learning_rate": 1.2653061224489798e-05, "loss": 1.1457, "step": 124 }, { "epoch": 0.02, "grad_norm": 6.253776774271056, "learning_rate": 1.2755102040816327e-05, "loss": 1.1222, "step": 125 }, { "epoch": 0.02, "grad_norm": 6.271747960540945, "learning_rate": 1.2857142857142859e-05, "loss": 1.1013, "step": 126 }, { "epoch": 0.02, "grad_norm": 5.69741315980313, "learning_rate": 1.2959183673469389e-05, "loss": 1.0754, "step": 127 }, { "epoch": 0.02, "grad_norm": 5.4875349314231565, "learning_rate": 1.3061224489795918e-05, "loss": 1.102, "step": 128 }, { "epoch": 0.02, "grad_norm": 6.038528782577277, "learning_rate": 1.316326530612245e-05, "loss": 1.1194, "step": 129 }, { "epoch": 0.02, "grad_norm": 5.606252307014983, "learning_rate": 1.326530612244898e-05, "loss": 1.1034, "step": 130 }, { "epoch": 0.02, "grad_norm": 5.327163657525908, "learning_rate": 1.3367346938775512e-05, "loss": 1.2087, "step": 131 }, { "epoch": 0.02, "grad_norm": 6.280116871322719, "learning_rate": 1.3469387755102042e-05, "loss": 1.0586, "step": 132 }, { "epoch": 0.02, "grad_norm": 6.6456091927763365, "learning_rate": 1.3571428571428574e-05, "loss": 1.0209, "step": 133 }, { "epoch": 0.02, "grad_norm": 6.558331129509311, "learning_rate": 1.3673469387755102e-05, "loss": 1.1321, "step": 134 }, { "epoch": 0.02, "grad_norm": 5.498934702458065, "learning_rate": 1.3775510204081634e-05, "loss": 1.0218, "step": 135 }, { "epoch": 0.02, "grad_norm": 5.339930806582817, "learning_rate": 1.3877551020408165e-05, "loss": 1.0357, "step": 136 }, { "epoch": 0.02, "grad_norm": 5.454047705726223, "learning_rate": 1.3979591836734696e-05, "loss": 0.9997, "step": 137 }, { "epoch": 0.02, "grad_norm": 5.979588983206454, "learning_rate": 1.4081632653061225e-05, "loss": 1.1083, "step": 138 }, { "epoch": 0.02, "grad_norm": 5.50575669433855, "learning_rate": 1.4183673469387755e-05, "loss": 1.0733, "step": 139 }, { "epoch": 0.02, "grad_norm": 7.352196459546808, "learning_rate": 1.4285714285714287e-05, "loss": 1.0983, "step": 140 }, { "epoch": 0.02, "grad_norm": 5.932749572950468, "learning_rate": 1.4387755102040817e-05, "loss": 1.1568, "step": 141 }, { "epoch": 0.02, "grad_norm": 6.83481653533095, "learning_rate": 1.448979591836735e-05, "loss": 1.1989, "step": 142 }, { "epoch": 0.02, "grad_norm": 6.2113642259074915, "learning_rate": 1.4591836734693878e-05, "loss": 1.0765, "step": 143 }, { "epoch": 0.02, "grad_norm": 5.559171013530669, "learning_rate": 1.469387755102041e-05, "loss": 1.2268, "step": 144 }, { "epoch": 0.02, "grad_norm": 6.313659971812374, "learning_rate": 1.479591836734694e-05, "loss": 1.0467, "step": 145 }, { "epoch": 0.02, "grad_norm": 5.643371072712547, "learning_rate": 1.4897959183673472e-05, "loss": 1.1269, "step": 146 }, { "epoch": 0.02, "grad_norm": 6.021005714047092, "learning_rate": 1.5000000000000002e-05, "loss": 1.1134, "step": 147 }, { "epoch": 0.02, "grad_norm": 6.373077262357572, "learning_rate": 1.510204081632653e-05, "loss": 1.0595, "step": 148 }, { "epoch": 0.02, "grad_norm": 6.5019027656338215, "learning_rate": 1.5204081632653063e-05, "loss": 1.0591, "step": 149 }, { "epoch": 0.02, "grad_norm": 5.834636688292023, "learning_rate": 1.530612244897959e-05, "loss": 1.0933, "step": 150 }, { "epoch": 0.02, "grad_norm": 5.277045405401534, "learning_rate": 1.5408163265306123e-05, "loss": 1.0802, "step": 151 }, { "epoch": 0.02, "grad_norm": 6.249863285521951, "learning_rate": 1.5510204081632655e-05, "loss": 1.0813, "step": 152 }, { "epoch": 0.02, "grad_norm": 5.709586843024497, "learning_rate": 1.5612244897959187e-05, "loss": 1.0674, "step": 153 }, { "epoch": 0.02, "grad_norm": 5.445331032824768, "learning_rate": 1.5714285714285715e-05, "loss": 0.9501, "step": 154 }, { "epoch": 0.02, "grad_norm": 5.36900789371636, "learning_rate": 1.5816326530612247e-05, "loss": 1.1602, "step": 155 }, { "epoch": 0.02, "grad_norm": 5.647764431722952, "learning_rate": 1.5918367346938776e-05, "loss": 1.1474, "step": 156 }, { "epoch": 0.02, "grad_norm": 5.312425415130779, "learning_rate": 1.6020408163265308e-05, "loss": 1.0233, "step": 157 }, { "epoch": 0.02, "grad_norm": 5.383622524455289, "learning_rate": 1.612244897959184e-05, "loss": 1.1282, "step": 158 }, { "epoch": 0.02, "grad_norm": 5.665115899124785, "learning_rate": 1.6224489795918368e-05, "loss": 1.0337, "step": 159 }, { "epoch": 0.02, "grad_norm": 5.249438793574112, "learning_rate": 1.63265306122449e-05, "loss": 1.131, "step": 160 }, { "epoch": 0.02, "grad_norm": 5.182023365734747, "learning_rate": 1.642857142857143e-05, "loss": 1.1596, "step": 161 }, { "epoch": 0.02, "grad_norm": 5.233978778860636, "learning_rate": 1.653061224489796e-05, "loss": 1.0368, "step": 162 }, { "epoch": 0.02, "grad_norm": 5.211250025821531, "learning_rate": 1.6632653061224492e-05, "loss": 1.1777, "step": 163 }, { "epoch": 0.03, "grad_norm": 5.193660311427835, "learning_rate": 1.673469387755102e-05, "loss": 1.0773, "step": 164 }, { "epoch": 0.03, "grad_norm": 5.667021464427709, "learning_rate": 1.6836734693877553e-05, "loss": 1.1726, "step": 165 }, { "epoch": 0.03, "grad_norm": 6.144632567111517, "learning_rate": 1.6938775510204085e-05, "loss": 1.028, "step": 166 }, { "epoch": 0.03, "grad_norm": 5.419125839731695, "learning_rate": 1.7040816326530613e-05, "loss": 1.0666, "step": 167 }, { "epoch": 0.03, "grad_norm": 5.5162703152902735, "learning_rate": 1.7142857142857142e-05, "loss": 0.9476, "step": 168 }, { "epoch": 0.03, "grad_norm": 5.534476185280114, "learning_rate": 1.7244897959183674e-05, "loss": 1.0293, "step": 169 }, { "epoch": 0.03, "grad_norm": 5.2943079469894, "learning_rate": 1.7346938775510206e-05, "loss": 1.0758, "step": 170 }, { "epoch": 0.03, "grad_norm": 6.076390118325341, "learning_rate": 1.7448979591836738e-05, "loss": 1.204, "step": 171 }, { "epoch": 0.03, "grad_norm": 5.087148129774895, "learning_rate": 1.7551020408163266e-05, "loss": 1.1198, "step": 172 }, { "epoch": 0.03, "grad_norm": 5.071546224881873, "learning_rate": 1.7653061224489798e-05, "loss": 1.0729, "step": 173 }, { "epoch": 0.03, "grad_norm": 4.934479929849503, "learning_rate": 1.7755102040816327e-05, "loss": 1.1263, "step": 174 }, { "epoch": 0.03, "grad_norm": 5.5097148641155105, "learning_rate": 1.785714285714286e-05, "loss": 1.0998, "step": 175 }, { "epoch": 0.03, "grad_norm": 4.933561500487126, "learning_rate": 1.795918367346939e-05, "loss": 1.0783, "step": 176 }, { "epoch": 0.03, "grad_norm": 5.1969610822139884, "learning_rate": 1.806122448979592e-05, "loss": 1.0884, "step": 177 }, { "epoch": 0.03, "grad_norm": 5.219171188346219, "learning_rate": 1.816326530612245e-05, "loss": 1.139, "step": 178 }, { "epoch": 0.03, "grad_norm": 5.238974571818712, "learning_rate": 1.826530612244898e-05, "loss": 1.096, "step": 179 }, { "epoch": 0.03, "grad_norm": 5.321624515588689, "learning_rate": 1.836734693877551e-05, "loss": 1.1538, "step": 180 }, { "epoch": 0.03, "grad_norm": 6.287719028369076, "learning_rate": 1.8469387755102043e-05, "loss": 1.1504, "step": 181 }, { "epoch": 0.03, "grad_norm": 4.816101595539008, "learning_rate": 1.8571428571428575e-05, "loss": 1.0856, "step": 182 }, { "epoch": 0.03, "grad_norm": 5.421835257556857, "learning_rate": 1.8673469387755104e-05, "loss": 1.115, "step": 183 }, { "epoch": 0.03, "grad_norm": 5.226520103966431, "learning_rate": 1.8775510204081636e-05, "loss": 1.1927, "step": 184 }, { "epoch": 0.03, "grad_norm": 4.957358233989192, "learning_rate": 1.8877551020408164e-05, "loss": 1.067, "step": 185 }, { "epoch": 0.03, "grad_norm": 4.884140241314185, "learning_rate": 1.8979591836734696e-05, "loss": 1.0874, "step": 186 }, { "epoch": 0.03, "grad_norm": 4.744041077689365, "learning_rate": 1.9081632653061225e-05, "loss": 1.0818, "step": 187 }, { "epoch": 0.03, "grad_norm": 5.5741498454057234, "learning_rate": 1.9183673469387756e-05, "loss": 1.0791, "step": 188 }, { "epoch": 0.03, "grad_norm": 6.252343995041189, "learning_rate": 1.928571428571429e-05, "loss": 1.1573, "step": 189 }, { "epoch": 0.03, "grad_norm": 5.436600844797034, "learning_rate": 1.9387755102040817e-05, "loss": 1.0886, "step": 190 }, { "epoch": 0.03, "grad_norm": 5.686800742042656, "learning_rate": 1.948979591836735e-05, "loss": 1.0898, "step": 191 }, { "epoch": 0.03, "grad_norm": 5.3941981721298164, "learning_rate": 1.9591836734693877e-05, "loss": 1.1444, "step": 192 }, { "epoch": 0.03, "grad_norm": 5.043454999129311, "learning_rate": 1.969387755102041e-05, "loss": 1.0928, "step": 193 }, { "epoch": 0.03, "grad_norm": 5.371089443369653, "learning_rate": 1.979591836734694e-05, "loss": 1.1281, "step": 194 }, { "epoch": 0.03, "grad_norm": 5.203990082279683, "learning_rate": 1.9897959183673473e-05, "loss": 1.1808, "step": 195 }, { "epoch": 0.03, "grad_norm": 4.997545641343556, "learning_rate": 2e-05, "loss": 1.144, "step": 196 }, { "epoch": 0.03, "grad_norm": 5.456843142113312, "learning_rate": 1.999999877114023e-05, "loss": 1.0797, "step": 197 }, { "epoch": 0.03, "grad_norm": 5.453312121763317, "learning_rate": 1.9999995084561225e-05, "loss": 1.0321, "step": 198 }, { "epoch": 0.03, "grad_norm": 10.309253158294316, "learning_rate": 1.9999988940263887e-05, "loss": 0.7624, "step": 199 }, { "epoch": 0.03, "grad_norm": 5.513880161766579, "learning_rate": 1.9999980338249726e-05, "loss": 1.1025, "step": 200 }, { "epoch": 0.03, "grad_norm": 5.668581887524097, "learning_rate": 1.999996927852086e-05, "loss": 1.1191, "step": 201 }, { "epoch": 0.03, "grad_norm": 5.402701531746498, "learning_rate": 1.9999955761080003e-05, "loss": 1.0652, "step": 202 }, { "epoch": 0.03, "grad_norm": 4.9881038307362, "learning_rate": 1.999993978593048e-05, "loss": 1.1601, "step": 203 }, { "epoch": 0.03, "grad_norm": 5.586499941037915, "learning_rate": 1.9999921353076216e-05, "loss": 1.1691, "step": 204 }, { "epoch": 0.03, "grad_norm": 5.46705629801981, "learning_rate": 1.9999900462521743e-05, "loss": 1.1723, "step": 205 }, { "epoch": 0.03, "grad_norm": 59.17034445169865, "learning_rate": 1.9999877114272194e-05, "loss": 0.9822, "step": 206 }, { "epoch": 0.03, "grad_norm": 6.176551551254107, "learning_rate": 1.9999851308333305e-05, "loss": 1.1339, "step": 207 }, { "epoch": 0.03, "grad_norm": 6.182693806023538, "learning_rate": 1.9999823044711424e-05, "loss": 0.9911, "step": 208 }, { "epoch": 0.03, "grad_norm": 4.95969238154806, "learning_rate": 1.9999792323413492e-05, "loss": 1.1375, "step": 209 }, { "epoch": 0.03, "grad_norm": 4.304652344445716, "learning_rate": 1.9999759144447062e-05, "loss": 1.0383, "step": 210 }, { "epoch": 0.03, "grad_norm": 4.736874041587823, "learning_rate": 1.9999723507820288e-05, "loss": 1.0764, "step": 211 }, { "epoch": 0.03, "grad_norm": 4.647646408322613, "learning_rate": 1.999968541354193e-05, "loss": 1.2264, "step": 212 }, { "epoch": 0.03, "grad_norm": 5.084198319034713, "learning_rate": 1.9999644861621346e-05, "loss": 1.1324, "step": 213 }, { "epoch": 0.03, "grad_norm": 4.938224249926442, "learning_rate": 1.9999601852068507e-05, "loss": 1.102, "step": 214 }, { "epoch": 0.03, "grad_norm": 6.063465444477739, "learning_rate": 1.9999556384893984e-05, "loss": 1.0721, "step": 215 }, { "epoch": 0.03, "grad_norm": 5.564512063889825, "learning_rate": 1.999950846010895e-05, "loss": 1.1503, "step": 216 }, { "epoch": 0.03, "grad_norm": 5.414783073934225, "learning_rate": 1.999945807772518e-05, "loss": 1.061, "step": 217 }, { "epoch": 0.03, "grad_norm": 5.011468038105515, "learning_rate": 1.999940523775506e-05, "loss": 1.1042, "step": 218 }, { "epoch": 0.03, "grad_norm": 4.833874683418772, "learning_rate": 1.999934994021158e-05, "loss": 0.9945, "step": 219 }, { "epoch": 0.03, "grad_norm": 4.799836412952987, "learning_rate": 1.9999292185108322e-05, "loss": 1.0803, "step": 220 }, { "epoch": 0.03, "grad_norm": 4.554987452781492, "learning_rate": 1.999923197245949e-05, "loss": 1.0829, "step": 221 }, { "epoch": 0.03, "grad_norm": 5.372674266383977, "learning_rate": 1.9999169302279874e-05, "loss": 1.1556, "step": 222 }, { "epoch": 0.03, "grad_norm": 5.657197518806744, "learning_rate": 1.9999104174584885e-05, "loss": 1.0696, "step": 223 }, { "epoch": 0.03, "grad_norm": 4.728835803379367, "learning_rate": 1.999903658939052e-05, "loss": 1.0844, "step": 224 }, { "epoch": 0.03, "grad_norm": 5.828075849629188, "learning_rate": 1.99989665467134e-05, "loss": 1.0699, "step": 225 }, { "epoch": 0.03, "grad_norm": 5.769915650459503, "learning_rate": 1.9998894046570735e-05, "loss": 1.1587, "step": 226 }, { "epoch": 0.03, "grad_norm": 6.069781347033537, "learning_rate": 1.9998819088980338e-05, "loss": 1.211, "step": 227 }, { "epoch": 0.03, "grad_norm": 5.27532701759076, "learning_rate": 1.999874167396064e-05, "loss": 1.1225, "step": 228 }, { "epoch": 0.04, "grad_norm": 5.972816909113727, "learning_rate": 1.999866180153066e-05, "loss": 1.2299, "step": 229 }, { "epoch": 0.04, "grad_norm": 12.328508278998827, "learning_rate": 1.9998579471710037e-05, "loss": 1.1446, "step": 230 }, { "epoch": 0.04, "grad_norm": 4.761237265785631, "learning_rate": 1.9998494684519e-05, "loss": 1.0745, "step": 231 }, { "epoch": 0.04, "grad_norm": 6.0094751510628495, "learning_rate": 1.9998407439978383e-05, "loss": 1.1848, "step": 232 }, { "epoch": 0.04, "grad_norm": 4.687867135519146, "learning_rate": 1.9998317738109638e-05, "loss": 1.1307, "step": 233 }, { "epoch": 0.04, "grad_norm": 5.373549487869426, "learning_rate": 1.99982255789348e-05, "loss": 1.1336, "step": 234 }, { "epoch": 0.04, "grad_norm": 4.491520583566882, "learning_rate": 1.999813096247653e-05, "loss": 1.1269, "step": 235 }, { "epoch": 0.04, "grad_norm": 4.5109900214204846, "learning_rate": 1.999803388875808e-05, "loss": 1.1576, "step": 236 }, { "epoch": 0.04, "grad_norm": 4.897322807826128, "learning_rate": 1.9997934357803302e-05, "loss": 1.1426, "step": 237 }, { "epoch": 0.04, "grad_norm": 4.737081073192212, "learning_rate": 1.999783236963666e-05, "loss": 0.9822, "step": 238 }, { "epoch": 0.04, "grad_norm": 507.51763352048624, "learning_rate": 1.9997727924283227e-05, "loss": 2.0016, "step": 239 }, { "epoch": 0.04, "grad_norm": 6.720695674654614, "learning_rate": 1.9997621021768663e-05, "loss": 1.0005, "step": 240 }, { "epoch": 0.04, "grad_norm": 5.22021102440788, "learning_rate": 1.9997511662119248e-05, "loss": 1.2467, "step": 241 }, { "epoch": 0.04, "grad_norm": 4.925981261258009, "learning_rate": 1.9997399845361853e-05, "loss": 1.1494, "step": 242 }, { "epoch": 0.04, "grad_norm": 6.326969030434556, "learning_rate": 1.9997285571523966e-05, "loss": 1.2098, "step": 243 }, { "epoch": 0.04, "grad_norm": 4.852488413814456, "learning_rate": 1.999716884063367e-05, "loss": 1.1044, "step": 244 }, { "epoch": 0.04, "grad_norm": 4.981439821590262, "learning_rate": 1.9997049652719655e-05, "loss": 1.1347, "step": 245 }, { "epoch": 0.04, "grad_norm": 298.8321545901624, "learning_rate": 1.999692800781121e-05, "loss": 0.9508, "step": 246 }, { "epoch": 0.04, "grad_norm": 5.030060046557593, "learning_rate": 1.9996803905938237e-05, "loss": 1.1103, "step": 247 }, { "epoch": 0.04, "grad_norm": 4.209425242514626, "learning_rate": 1.9996677347131237e-05, "loss": 1.0899, "step": 248 }, { "epoch": 0.04, "grad_norm": 4.786140168586079, "learning_rate": 1.999654833142131e-05, "loss": 1.2291, "step": 249 }, { "epoch": 0.04, "grad_norm": 4.675732962915219, "learning_rate": 1.9996416858840167e-05, "loss": 1.1652, "step": 250 }, { "epoch": 0.04, "grad_norm": 5.486052375807091, "learning_rate": 1.999628292942012e-05, "loss": 1.1669, "step": 251 }, { "epoch": 0.04, "grad_norm": 5.513586744808445, "learning_rate": 1.9996146543194086e-05, "loss": 1.2232, "step": 252 }, { "epoch": 0.04, "grad_norm": 5.482304872859721, "learning_rate": 1.9996007700195583e-05, "loss": 1.0857, "step": 253 }, { "epoch": 0.04, "grad_norm": 7.219060691629565, "learning_rate": 1.9995866400458736e-05, "loss": 1.0267, "step": 254 }, { "epoch": 0.04, "grad_norm": 4.629086694340683, "learning_rate": 1.9995722644018275e-05, "loss": 1.1984, "step": 255 }, { "epoch": 0.04, "grad_norm": 4.554770506006259, "learning_rate": 1.9995576430909526e-05, "loss": 1.1749, "step": 256 }, { "epoch": 0.04, "grad_norm": 4.486611373898943, "learning_rate": 1.9995427761168427e-05, "loss": 1.0657, "step": 257 }, { "epoch": 0.04, "grad_norm": 5.086051833263698, "learning_rate": 1.9995276634831518e-05, "loss": 1.1945, "step": 258 }, { "epoch": 0.04, "grad_norm": 4.784755615304702, "learning_rate": 1.9995123051935938e-05, "loss": 0.9962, "step": 259 }, { "epoch": 0.04, "grad_norm": 5.1527510227553535, "learning_rate": 1.9994967012519436e-05, "loss": 1.0401, "step": 260 }, { "epoch": 0.04, "grad_norm": 4.979116067663447, "learning_rate": 1.9994808516620362e-05, "loss": 0.9774, "step": 261 }, { "epoch": 0.04, "grad_norm": 4.845044200480525, "learning_rate": 1.999464756427767e-05, "loss": 1.0955, "step": 262 }, { "epoch": 0.04, "grad_norm": 4.388519751080092, "learning_rate": 1.999448415553092e-05, "loss": 1.0714, "step": 263 }, { "epoch": 0.04, "grad_norm": 4.667854781851994, "learning_rate": 1.9994318290420264e-05, "loss": 0.9685, "step": 264 }, { "epoch": 0.04, "grad_norm": 4.912765838925781, "learning_rate": 1.9994149968986477e-05, "loss": 1.2169, "step": 265 }, { "epoch": 0.04, "grad_norm": 4.323589806398268, "learning_rate": 1.9993979191270923e-05, "loss": 1.0595, "step": 266 }, { "epoch": 0.04, "grad_norm": 4.482357421422183, "learning_rate": 1.9993805957315575e-05, "loss": 1.1664, "step": 267 }, { "epoch": 0.04, "grad_norm": 5.221027555569462, "learning_rate": 1.999363026716301e-05, "loss": 1.1076, "step": 268 }, { "epoch": 0.04, "grad_norm": 16.703437262233603, "learning_rate": 1.9993452120856412e-05, "loss": 0.88, "step": 269 }, { "epoch": 0.04, "grad_norm": 7.941943116606574, "learning_rate": 1.9993271518439554e-05, "loss": 1.0686, "step": 270 }, { "epoch": 0.04, "grad_norm": 4.695548346288317, "learning_rate": 1.999308845995683e-05, "loss": 1.0999, "step": 271 }, { "epoch": 0.04, "grad_norm": 4.326582127278831, "learning_rate": 1.9992902945453227e-05, "loss": 1.1071, "step": 272 }, { "epoch": 0.04, "grad_norm": 4.5673615588610215, "learning_rate": 1.9992714974974344e-05, "loss": 1.1184, "step": 273 }, { "epoch": 0.04, "grad_norm": 5.379449041896806, "learning_rate": 1.9992524548566378e-05, "loss": 1.1919, "step": 274 }, { "epoch": 0.04, "grad_norm": 5.356848963709712, "learning_rate": 1.9992331666276126e-05, "loss": 1.0153, "step": 275 }, { "epoch": 0.04, "grad_norm": 5.7495735533750665, "learning_rate": 1.9992136328151e-05, "loss": 1.1424, "step": 276 }, { "epoch": 0.04, "grad_norm": 4.708712119538405, "learning_rate": 1.9991938534238996e-05, "loss": 1.0777, "step": 277 }, { "epoch": 0.04, "grad_norm": 4.623074644106911, "learning_rate": 1.9991738284588743e-05, "loss": 1.1632, "step": 278 }, { "epoch": 0.04, "grad_norm": 5.038994081163376, "learning_rate": 1.9991535579249443e-05, "loss": 1.1002, "step": 279 }, { "epoch": 0.04, "grad_norm": 4.4993011643736915, "learning_rate": 1.9991330418270926e-05, "loss": 1.1014, "step": 280 }, { "epoch": 0.04, "grad_norm": 4.312602735744396, "learning_rate": 1.9991122801703606e-05, "loss": 1.087, "step": 281 }, { "epoch": 0.04, "grad_norm": 281.9379233611627, "learning_rate": 1.9990912729598512e-05, "loss": 1.3675, "step": 282 }, { "epoch": 0.04, "grad_norm": 4.403901398769606, "learning_rate": 1.9990700202007276e-05, "loss": 0.9703, "step": 283 }, { "epoch": 0.04, "grad_norm": 5.718875253576995, "learning_rate": 1.999048521898213e-05, "loss": 1.0062, "step": 284 }, { "epoch": 0.04, "grad_norm": 4.383041046256885, "learning_rate": 1.999026778057591e-05, "loss": 1.0738, "step": 285 }, { "epoch": 0.04, "grad_norm": 4.662485194619379, "learning_rate": 1.999004788684206e-05, "loss": 1.1438, "step": 286 }, { "epoch": 0.04, "grad_norm": 4.353269823598559, "learning_rate": 1.9989825537834623e-05, "loss": 1.0382, "step": 287 }, { "epoch": 0.04, "grad_norm": 4.508331270093196, "learning_rate": 1.9989600733608235e-05, "loss": 1.0688, "step": 288 }, { "epoch": 0.04, "grad_norm": 15.046375086111272, "learning_rate": 1.9989373474218163e-05, "loss": 1.1265, "step": 289 }, { "epoch": 0.04, "grad_norm": 4.7962186931442705, "learning_rate": 1.998914375972025e-05, "loss": 1.146, "step": 290 }, { "epoch": 0.04, "grad_norm": 4.70566502248541, "learning_rate": 1.998891159017096e-05, "loss": 1.068, "step": 291 }, { "epoch": 0.04, "grad_norm": 80.8458009620087, "learning_rate": 1.998867696562735e-05, "loss": 1.1027, "step": 292 }, { "epoch": 0.04, "grad_norm": 4.554679405892336, "learning_rate": 1.9988439886147082e-05, "loss": 1.0465, "step": 293 }, { "epoch": 0.05, "grad_norm": 4.717994795659879, "learning_rate": 1.9988200351788425e-05, "loss": 1.125, "step": 294 }, { "epoch": 0.05, "grad_norm": 4.380389098649303, "learning_rate": 1.9987958362610255e-05, "loss": 0.9189, "step": 295 }, { "epoch": 0.05, "grad_norm": 5.012117873767208, "learning_rate": 1.9987713918672044e-05, "loss": 1.2106, "step": 296 }, { "epoch": 0.05, "grad_norm": 5.827671238106491, "learning_rate": 1.9987467020033865e-05, "loss": 1.1088, "step": 297 }, { "epoch": 0.05, "grad_norm": 4.102478908570774, "learning_rate": 1.99872176667564e-05, "loss": 1.0611, "step": 298 }, { "epoch": 0.05, "grad_norm": 4.919138180496703, "learning_rate": 1.9986965858900934e-05, "loss": 1.0139, "step": 299 }, { "epoch": 0.05, "grad_norm": 4.634896848034215, "learning_rate": 1.9986711596529356e-05, "loss": 1.198, "step": 300 }, { "epoch": 0.05, "grad_norm": 10.76351079430654, "learning_rate": 1.9986454879704157e-05, "loss": 1.208, "step": 301 }, { "epoch": 0.05, "grad_norm": 5.003244093064284, "learning_rate": 1.9986195708488428e-05, "loss": 1.1318, "step": 302 }, { "epoch": 0.05, "grad_norm": 4.3217012747656565, "learning_rate": 1.9985934082945868e-05, "loss": 1.0452, "step": 303 }, { "epoch": 0.05, "grad_norm": 4.800641119661177, "learning_rate": 1.9985670003140777e-05, "loss": 1.1481, "step": 304 }, { "epoch": 0.05, "grad_norm": 4.248394762455576, "learning_rate": 1.9985403469138056e-05, "loss": 1.098, "step": 305 }, { "epoch": 0.05, "grad_norm": 4.557215159093832, "learning_rate": 1.9985134481003213e-05, "loss": 1.1635, "step": 306 }, { "epoch": 0.05, "grad_norm": 4.411297001088896, "learning_rate": 1.9984863038802364e-05, "loss": 1.0967, "step": 307 }, { "epoch": 0.05, "grad_norm": 4.619947266168186, "learning_rate": 1.998458914260221e-05, "loss": 1.1012, "step": 308 }, { "epoch": 0.05, "grad_norm": 4.7941902957102895, "learning_rate": 1.9984312792470074e-05, "loss": 1.1977, "step": 309 }, { "epoch": 0.05, "grad_norm": 4.78262219409935, "learning_rate": 1.9984033988473874e-05, "loss": 1.1142, "step": 310 }, { "epoch": 0.05, "grad_norm": 4.6109737402679, "learning_rate": 1.9983752730682132e-05, "loss": 1.1662, "step": 311 }, { "epoch": 0.05, "grad_norm": 12.660705540197382, "learning_rate": 1.9983469019163976e-05, "loss": 1.1834, "step": 312 }, { "epoch": 0.05, "grad_norm": 4.477088184986276, "learning_rate": 1.998318285398913e-05, "loss": 1.1383, "step": 313 }, { "epoch": 0.05, "grad_norm": 4.494691763849153, "learning_rate": 1.998289423522793e-05, "loss": 1.1171, "step": 314 }, { "epoch": 0.05, "grad_norm": 4.583589314263017, "learning_rate": 1.9982603162951303e-05, "loss": 1.0342, "step": 315 }, { "epoch": 0.05, "grad_norm": 13.239258176680355, "learning_rate": 1.9982309637230796e-05, "loss": 1.0779, "step": 316 }, { "epoch": 0.05, "grad_norm": 4.533301393135521, "learning_rate": 1.9982013658138544e-05, "loss": 1.1758, "step": 317 }, { "epoch": 0.05, "grad_norm": 4.160943400606343, "learning_rate": 1.9981715225747287e-05, "loss": 0.993, "step": 318 }, { "epoch": 0.05, "grad_norm": 4.611711397681244, "learning_rate": 1.998141434013038e-05, "loss": 1.061, "step": 319 }, { "epoch": 0.05, "grad_norm": 4.98935758205428, "learning_rate": 1.9981111001361762e-05, "loss": 1.1703, "step": 320 }, { "epoch": 0.05, "grad_norm": 4.475805740319163, "learning_rate": 1.9980805209515995e-05, "loss": 1.1512, "step": 321 }, { "epoch": 0.05, "grad_norm": 4.976352036289246, "learning_rate": 1.9980496964668228e-05, "loss": 1.059, "step": 322 }, { "epoch": 0.05, "grad_norm": 4.779970748595774, "learning_rate": 1.998018626689422e-05, "loss": 1.1983, "step": 323 }, { "epoch": 0.05, "grad_norm": 15.610805708531752, "learning_rate": 1.9979873116270333e-05, "loss": 1.1178, "step": 324 }, { "epoch": 0.05, "grad_norm": 4.4942074564889785, "learning_rate": 1.997955751287353e-05, "loss": 1.0686, "step": 325 }, { "epoch": 0.05, "grad_norm": 4.349619273852159, "learning_rate": 1.997923945678138e-05, "loss": 1.1128, "step": 326 }, { "epoch": 0.05, "grad_norm": 102.78795406026858, "learning_rate": 1.9978918948072047e-05, "loss": 1.5781, "step": 327 }, { "epoch": 0.05, "grad_norm": 5.167255102561554, "learning_rate": 1.997859598682431e-05, "loss": 1.1255, "step": 328 }, { "epoch": 0.05, "grad_norm": 4.462766578218212, "learning_rate": 1.997827057311753e-05, "loss": 1.1399, "step": 329 }, { "epoch": 0.05, "grad_norm": 4.196999251460958, "learning_rate": 1.99779427070317e-05, "loss": 1.0407, "step": 330 }, { "epoch": 0.05, "grad_norm": 4.672166926366032, "learning_rate": 1.99776123886474e-05, "loss": 1.1284, "step": 331 }, { "epoch": 0.05, "grad_norm": 4.394708768001454, "learning_rate": 1.9977279618045798e-05, "loss": 1.1958, "step": 332 }, { "epoch": 0.05, "grad_norm": 4.585192132933774, "learning_rate": 1.9976944395308696e-05, "loss": 1.0985, "step": 333 }, { "epoch": 0.05, "grad_norm": 4.732639403301168, "learning_rate": 1.9976606720518474e-05, "loss": 0.9398, "step": 334 }, { "epoch": 0.05, "grad_norm": 4.385645024123697, "learning_rate": 1.9976266593758123e-05, "loss": 1.1773, "step": 335 }, { "epoch": 0.05, "grad_norm": 3.883495168249728, "learning_rate": 1.9975924015111243e-05, "loss": 0.9511, "step": 336 }, { "epoch": 0.05, "grad_norm": 4.378627617287336, "learning_rate": 1.9975578984662017e-05, "loss": 0.9816, "step": 337 }, { "epoch": 0.05, "grad_norm": 4.272296105323741, "learning_rate": 1.9975231502495255e-05, "loss": 1.0512, "step": 338 }, { "epoch": 0.05, "grad_norm": 5.1752536572488035, "learning_rate": 1.9974881568696358e-05, "loss": 1.1703, "step": 339 }, { "epoch": 0.05, "grad_norm": 4.280722914541011, "learning_rate": 1.997452918335133e-05, "loss": 1.1305, "step": 340 }, { "epoch": 0.05, "grad_norm": 5.659806010735759, "learning_rate": 1.9974174346546767e-05, "loss": 1.0883, "step": 341 }, { "epoch": 0.05, "grad_norm": 5.021467650646564, "learning_rate": 1.9973817058369892e-05, "loss": 1.21, "step": 342 }, { "epoch": 0.05, "grad_norm": 4.73234270501322, "learning_rate": 1.9973457318908508e-05, "loss": 0.982, "step": 343 }, { "epoch": 0.05, "grad_norm": 4.808261971142038, "learning_rate": 1.997309512825103e-05, "loss": 1.1985, "step": 344 }, { "epoch": 0.05, "grad_norm": 4.862801194088539, "learning_rate": 1.9972730486486476e-05, "loss": 1.158, "step": 345 }, { "epoch": 0.05, "grad_norm": 4.679447242215395, "learning_rate": 1.9972363393704466e-05, "loss": 1.1748, "step": 346 }, { "epoch": 0.05, "grad_norm": 4.0787881510678945, "learning_rate": 1.997199384999522e-05, "loss": 1.0843, "step": 347 }, { "epoch": 0.05, "grad_norm": 4.801558853286235, "learning_rate": 1.997162185544956e-05, "loss": 1.0041, "step": 348 }, { "epoch": 0.05, "grad_norm": 4.679054095707912, "learning_rate": 1.9971247410158908e-05, "loss": 0.972, "step": 349 }, { "epoch": 0.05, "grad_norm": 4.383388745457075, "learning_rate": 1.99708705142153e-05, "loss": 1.0952, "step": 350 }, { "epoch": 0.05, "grad_norm": 4.590760880880462, "learning_rate": 1.9970491167711365e-05, "loss": 1.0535, "step": 351 }, { "epoch": 0.05, "grad_norm": 4.253862279071871, "learning_rate": 1.9970109370740333e-05, "loss": 1.1829, "step": 352 }, { "epoch": 0.05, "grad_norm": 4.440583224131077, "learning_rate": 1.996972512339604e-05, "loss": 1.0307, "step": 353 }, { "epoch": 0.05, "grad_norm": 3.708308805584549, "learning_rate": 1.9969338425772918e-05, "loss": 1.1596, "step": 354 }, { "epoch": 0.05, "grad_norm": 3.982991930831451, "learning_rate": 1.996894927796602e-05, "loss": 1.0943, "step": 355 }, { "epoch": 0.05, "grad_norm": 4.5351066336760235, "learning_rate": 1.9968557680070972e-05, "loss": 1.2282, "step": 356 }, { "epoch": 0.05, "grad_norm": 4.088719223079842, "learning_rate": 1.996816363218403e-05, "loss": 1.018, "step": 357 }, { "epoch": 0.05, "grad_norm": 4.403197360776069, "learning_rate": 1.9967767134402033e-05, "loss": 1.0434, "step": 358 }, { "epoch": 0.05, "grad_norm": 4.3813635695825655, "learning_rate": 1.9967368186822428e-05, "loss": 0.9905, "step": 359 }, { "epoch": 0.06, "grad_norm": 4.229914743790464, "learning_rate": 1.996696678954327e-05, "loss": 1.1482, "step": 360 }, { "epoch": 0.06, "grad_norm": 4.770387275611607, "learning_rate": 1.996656294266321e-05, "loss": 1.1022, "step": 361 }, { "epoch": 0.06, "grad_norm": 4.874988333761614, "learning_rate": 1.9966156646281502e-05, "loss": 1.1424, "step": 362 }, { "epoch": 0.06, "grad_norm": 3.9995638240159233, "learning_rate": 1.9965747900498002e-05, "loss": 0.9644, "step": 363 }, { "epoch": 0.06, "grad_norm": 3.9612943598465606, "learning_rate": 1.9965336705413167e-05, "loss": 1.1147, "step": 364 }, { "epoch": 0.06, "grad_norm": 8.956813036420114, "learning_rate": 1.996492306112806e-05, "loss": 1.0985, "step": 365 }, { "epoch": 0.06, "grad_norm": 4.552523678043079, "learning_rate": 1.9964506967744336e-05, "loss": 1.0996, "step": 366 }, { "epoch": 0.06, "grad_norm": 4.438890757180122, "learning_rate": 1.9964088425364267e-05, "loss": 1.1752, "step": 367 }, { "epoch": 0.06, "grad_norm": 4.816508118861078, "learning_rate": 1.9963667434090717e-05, "loss": 1.104, "step": 368 }, { "epoch": 0.06, "grad_norm": 3.858878038488043, "learning_rate": 1.9963243994027157e-05, "loss": 1.1146, "step": 369 }, { "epoch": 0.06, "grad_norm": 8.368493036325697, "learning_rate": 1.9962818105277648e-05, "loss": 1.1827, "step": 370 }, { "epoch": 0.06, "grad_norm": 4.1246671758312585, "learning_rate": 1.9962389767946867e-05, "loss": 1.0241, "step": 371 }, { "epoch": 0.06, "grad_norm": 3.9237526131257954, "learning_rate": 1.9961958982140088e-05, "loss": 1.035, "step": 372 }, { "epoch": 0.06, "grad_norm": 3.9771041368400177, "learning_rate": 1.996152574796318e-05, "loss": 1.004, "step": 373 }, { "epoch": 0.06, "grad_norm": 5.265715900162166, "learning_rate": 1.996109006552263e-05, "loss": 1.1111, "step": 374 }, { "epoch": 0.06, "grad_norm": 4.32195360794682, "learning_rate": 1.9960651934925514e-05, "loss": 1.0701, "step": 375 }, { "epoch": 0.06, "grad_norm": 3.977376902921775, "learning_rate": 1.9960211356279502e-05, "loss": 1.0774, "step": 376 }, { "epoch": 0.06, "grad_norm": 4.4335843341897005, "learning_rate": 1.9959768329692884e-05, "loss": 1.1743, "step": 377 }, { "epoch": 0.06, "grad_norm": 3.873209849255563, "learning_rate": 1.995932285527455e-05, "loss": 1.0233, "step": 378 }, { "epoch": 0.06, "grad_norm": 4.401801021300256, "learning_rate": 1.995887493313397e-05, "loss": 1.1243, "step": 379 }, { "epoch": 0.06, "grad_norm": 4.386100362770559, "learning_rate": 1.9958424563381245e-05, "loss": 1.1078, "step": 380 }, { "epoch": 0.06, "grad_norm": 4.403668567877387, "learning_rate": 1.9957971746127052e-05, "loss": 1.0611, "step": 381 }, { "epoch": 0.06, "grad_norm": 4.496062950052071, "learning_rate": 1.9957516481482686e-05, "loss": 1.0897, "step": 382 }, { "epoch": 0.06, "grad_norm": 5.1495625285379605, "learning_rate": 1.9957058769560042e-05, "loss": 1.1743, "step": 383 }, { "epoch": 0.06, "grad_norm": 4.460766501417975, "learning_rate": 1.9956598610471603e-05, "loss": 1.1613, "step": 384 }, { "epoch": 0.06, "grad_norm": 4.389722614887968, "learning_rate": 1.995613600433047e-05, "loss": 1.0859, "step": 385 }, { "epoch": 0.06, "grad_norm": 4.576442531385308, "learning_rate": 1.995567095125034e-05, "loss": 1.0435, "step": 386 }, { "epoch": 0.06, "grad_norm": 4.237018164006678, "learning_rate": 1.9955203451345507e-05, "loss": 1.0716, "step": 387 }, { "epoch": 0.06, "grad_norm": 5.028493746861455, "learning_rate": 1.9954733504730868e-05, "loss": 1.0211, "step": 388 }, { "epoch": 0.06, "grad_norm": 4.362387612595332, "learning_rate": 1.9954261111521926e-05, "loss": 1.0603, "step": 389 }, { "epoch": 0.06, "grad_norm": 4.280742489164971, "learning_rate": 1.9953786271834777e-05, "loss": 1.0198, "step": 390 }, { "epoch": 0.06, "grad_norm": 4.343266350378013, "learning_rate": 1.9953308985786132e-05, "loss": 1.1851, "step": 391 }, { "epoch": 0.06, "grad_norm": 5.382126821153117, "learning_rate": 1.9952829253493287e-05, "loss": 1.1069, "step": 392 }, { "epoch": 0.06, "grad_norm": 4.4645047350214515, "learning_rate": 1.995234707507415e-05, "loss": 1.0897, "step": 393 }, { "epoch": 0.06, "grad_norm": 4.347303636479825, "learning_rate": 1.9951862450647227e-05, "loss": 1.0755, "step": 394 }, { "epoch": 0.06, "grad_norm": 6.687625133699968, "learning_rate": 1.9951375380331622e-05, "loss": 1.0315, "step": 395 }, { "epoch": 0.06, "grad_norm": 4.909428928532073, "learning_rate": 1.9950885864247043e-05, "loss": 1.1643, "step": 396 }, { "epoch": 0.06, "grad_norm": 4.578689500603283, "learning_rate": 1.9950393902513804e-05, "loss": 1.0898, "step": 397 }, { "epoch": 0.06, "grad_norm": 10.609108223587599, "learning_rate": 1.9949899495252816e-05, "loss": 1.0755, "step": 398 }, { "epoch": 0.06, "grad_norm": 4.841143079264003, "learning_rate": 1.9949402642585585e-05, "loss": 1.1456, "step": 399 }, { "epoch": 0.06, "grad_norm": 4.629729376883061, "learning_rate": 1.9948903344634223e-05, "loss": 1.065, "step": 400 }, { "epoch": 0.06, "grad_norm": 55.86395081501506, "learning_rate": 1.994840160152145e-05, "loss": 1.9193, "step": 401 }, { "epoch": 0.06, "grad_norm": 3.6236513738541767, "learning_rate": 1.9947897413370575e-05, "loss": 1.0813, "step": 402 }, { "epoch": 0.06, "grad_norm": 5.37260215681647, "learning_rate": 1.9947390780305515e-05, "loss": 1.2356, "step": 403 }, { "epoch": 0.06, "grad_norm": 4.394105492668787, "learning_rate": 1.9946881702450788e-05, "loss": 1.0607, "step": 404 }, { "epoch": 0.06, "grad_norm": 89.50489669809592, "learning_rate": 1.994637017993151e-05, "loss": 1.5628, "step": 405 }, { "epoch": 0.06, "grad_norm": 5.597609858089008, "learning_rate": 1.9945856212873394e-05, "loss": 1.0847, "step": 406 }, { "epoch": 0.06, "grad_norm": 4.459506128727871, "learning_rate": 1.9945339801402768e-05, "loss": 1.0432, "step": 407 }, { "epoch": 0.06, "grad_norm": 4.071685345913883, "learning_rate": 1.9944820945646543e-05, "loss": 1.0675, "step": 408 }, { "epoch": 0.06, "grad_norm": 4.231922265254759, "learning_rate": 1.9944299645732242e-05, "loss": 1.0479, "step": 409 }, { "epoch": 0.06, "grad_norm": 4.389508171925526, "learning_rate": 1.9943775901787988e-05, "loss": 1.0723, "step": 410 }, { "epoch": 0.06, "grad_norm": 4.503983891850023, "learning_rate": 1.99432497139425e-05, "loss": 1.0749, "step": 411 }, { "epoch": 0.06, "grad_norm": 4.152011570105329, "learning_rate": 1.9942721082325103e-05, "loss": 1.0135, "step": 412 }, { "epoch": 0.06, "grad_norm": 50.68316350833893, "learning_rate": 1.9942190007065715e-05, "loss": 1.1019, "step": 413 }, { "epoch": 0.06, "grad_norm": 5.500780906339276, "learning_rate": 1.9941656488294865e-05, "loss": 1.0915, "step": 414 }, { "epoch": 0.06, "grad_norm": 4.191607605165063, "learning_rate": 1.9941120526143673e-05, "loss": 1.0326, "step": 415 }, { "epoch": 0.06, "grad_norm": 4.330054099494727, "learning_rate": 1.9940582120743867e-05, "loss": 0.9911, "step": 416 }, { "epoch": 0.06, "grad_norm": 4.511469571369914, "learning_rate": 1.9940041272227767e-05, "loss": 1.1471, "step": 417 }, { "epoch": 0.06, "grad_norm": 4.329974316823594, "learning_rate": 1.9939497980728305e-05, "loss": 1.0064, "step": 418 }, { "epoch": 0.06, "grad_norm": 3.7911891744849937, "learning_rate": 1.9938952246378996e-05, "loss": 1.0258, "step": 419 }, { "epoch": 0.06, "grad_norm": 4.619262957851557, "learning_rate": 1.993840406931398e-05, "loss": 0.9322, "step": 420 }, { "epoch": 0.06, "grad_norm": 4.866106813938292, "learning_rate": 1.9937853449667975e-05, "loss": 1.0814, "step": 421 }, { "epoch": 0.06, "grad_norm": 4.454545061711105, "learning_rate": 1.993730038757631e-05, "loss": 1.1405, "step": 422 }, { "epoch": 0.06, "grad_norm": 4.344621327807707, "learning_rate": 1.993674488317491e-05, "loss": 1.202, "step": 423 }, { "epoch": 0.06, "grad_norm": 8.97729689010236, "learning_rate": 1.9936186936600307e-05, "loss": 1.1599, "step": 424 }, { "epoch": 0.07, "grad_norm": 15.863469621733676, "learning_rate": 1.9935626547989623e-05, "loss": 1.0807, "step": 425 }, { "epoch": 0.07, "grad_norm": 4.242651785752871, "learning_rate": 1.9935063717480587e-05, "loss": 1.0514, "step": 426 }, { "epoch": 0.07, "grad_norm": 6.455541990923886, "learning_rate": 1.9934498445211533e-05, "loss": 1.0635, "step": 427 }, { "epoch": 0.07, "grad_norm": 52.6839859691502, "learning_rate": 1.993393073132138e-05, "loss": 1.2474, "step": 428 }, { "epoch": 0.07, "grad_norm": 4.836687324373342, "learning_rate": 1.9933360575949666e-05, "loss": 0.9968, "step": 429 }, { "epoch": 0.07, "grad_norm": 4.629865348213475, "learning_rate": 1.993278797923651e-05, "loss": 1.1198, "step": 430 }, { "epoch": 0.07, "grad_norm": 4.358351299783933, "learning_rate": 1.9932212941322647e-05, "loss": 1.0978, "step": 431 }, { "epoch": 0.07, "grad_norm": 3.9377679799485334, "learning_rate": 1.99316354623494e-05, "loss": 1.0485, "step": 432 }, { "epoch": 0.07, "grad_norm": 4.6018038205612255, "learning_rate": 1.99310555424587e-05, "loss": 1.0514, "step": 433 }, { "epoch": 0.07, "grad_norm": 4.0440373728530785, "learning_rate": 1.9930473181793072e-05, "loss": 0.9412, "step": 434 }, { "epoch": 0.07, "grad_norm": 4.2822041977740986, "learning_rate": 1.992988838049565e-05, "loss": 0.9845, "step": 435 }, { "epoch": 0.07, "grad_norm": 4.3024294123951226, "learning_rate": 1.992930113871016e-05, "loss": 1.0778, "step": 436 }, { "epoch": 0.07, "grad_norm": 4.426499951361489, "learning_rate": 1.9928711456580924e-05, "loss": 1.0527, "step": 437 }, { "epoch": 0.07, "grad_norm": 3.97926425301124, "learning_rate": 1.9928119334252874e-05, "loss": 1.0938, "step": 438 }, { "epoch": 0.07, "grad_norm": 4.494487628691639, "learning_rate": 1.9927524771871537e-05, "loss": 1.0805, "step": 439 }, { "epoch": 0.07, "grad_norm": 4.1492409727708885, "learning_rate": 1.992692776958304e-05, "loss": 1.1888, "step": 440 }, { "epoch": 0.07, "grad_norm": 3.7536887124074765, "learning_rate": 1.9926328327534108e-05, "loss": 1.1237, "step": 441 }, { "epoch": 0.07, "grad_norm": 4.17207580577035, "learning_rate": 1.9925726445872064e-05, "loss": 1.117, "step": 442 }, { "epoch": 0.07, "grad_norm": 4.034264466789217, "learning_rate": 1.992512212474484e-05, "loss": 1.038, "step": 443 }, { "epoch": 0.07, "grad_norm": 5.935357931179911, "learning_rate": 1.992451536430096e-05, "loss": 0.9145, "step": 444 }, { "epoch": 0.07, "grad_norm": 4.188855247069045, "learning_rate": 1.9923906164689545e-05, "loss": 1.1691, "step": 445 }, { "epoch": 0.07, "grad_norm": 3.872423420867507, "learning_rate": 1.9923294526060318e-05, "loss": 0.9843, "step": 446 }, { "epoch": 0.07, "grad_norm": 4.043213930905514, "learning_rate": 1.992268044856361e-05, "loss": 1.2413, "step": 447 }, { "epoch": 0.07, "grad_norm": 3.8760649774844254, "learning_rate": 1.992206393235034e-05, "loss": 1.0825, "step": 448 }, { "epoch": 0.07, "grad_norm": 3.574510132496088, "learning_rate": 1.9921444977572026e-05, "loss": 1.0488, "step": 449 }, { "epoch": 0.07, "grad_norm": 9.872733544881923, "learning_rate": 1.9920823584380797e-05, "loss": 1.212, "step": 450 }, { "epoch": 0.07, "grad_norm": 15.738193513731524, "learning_rate": 1.992019975292937e-05, "loss": 1.2698, "step": 451 }, { "epoch": 0.07, "grad_norm": 4.1666626790638075, "learning_rate": 1.9919573483371065e-05, "loss": 1.0719, "step": 452 }, { "epoch": 0.07, "grad_norm": 4.152054735117916, "learning_rate": 1.9918944775859805e-05, "loss": 1.0218, "step": 453 }, { "epoch": 0.07, "grad_norm": 4.0873913265028285, "learning_rate": 1.9918313630550107e-05, "loss": 1.1434, "step": 454 }, { "epoch": 0.07, "grad_norm": 3.9934330107747416, "learning_rate": 1.9917680047597085e-05, "loss": 1.1273, "step": 455 }, { "epoch": 0.07, "grad_norm": 4.376873182361851, "learning_rate": 1.9917044027156462e-05, "loss": 1.06, "step": 456 }, { "epoch": 0.07, "grad_norm": 4.715172465931275, "learning_rate": 1.9916405569384548e-05, "loss": 1.0255, "step": 457 }, { "epoch": 0.07, "grad_norm": 68.34701161214348, "learning_rate": 1.9915764674438263e-05, "loss": 1.2339, "step": 458 }, { "epoch": 0.07, "grad_norm": 4.665587219676493, "learning_rate": 1.991512134247512e-05, "loss": 1.0084, "step": 459 }, { "epoch": 0.07, "grad_norm": 3.8415418746312766, "learning_rate": 1.9914475573653228e-05, "loss": 1.0346, "step": 460 }, { "epoch": 0.07, "grad_norm": 3.8246643408106213, "learning_rate": 1.9913827368131303e-05, "loss": 1.0004, "step": 461 }, { "epoch": 0.07, "grad_norm": 4.114748947891342, "learning_rate": 1.991317672606866e-05, "loss": 1.1898, "step": 462 }, { "epoch": 0.07, "grad_norm": 3.6777417567463417, "learning_rate": 1.9912523647625195e-05, "loss": 1.1035, "step": 463 }, { "epoch": 0.07, "grad_norm": 4.9174622373220895, "learning_rate": 1.991186813296143e-05, "loss": 0.9963, "step": 464 }, { "epoch": 0.07, "grad_norm": 4.6798659957889805, "learning_rate": 1.9911210182238462e-05, "loss": 1.1222, "step": 465 }, { "epoch": 0.07, "grad_norm": 3.879278324637952, "learning_rate": 1.9910549795618003e-05, "loss": 1.0055, "step": 466 }, { "epoch": 0.07, "grad_norm": 4.0401268558658145, "learning_rate": 1.9909886973262356e-05, "loss": 1.1659, "step": 467 }, { "epoch": 0.07, "grad_norm": 20.71395567573766, "learning_rate": 1.9909221715334428e-05, "loss": 1.1785, "step": 468 }, { "epoch": 0.07, "grad_norm": 3.9734760731086074, "learning_rate": 1.9908554021997715e-05, "loss": 1.0212, "step": 469 }, { "epoch": 0.07, "grad_norm": 5.3755313994964915, "learning_rate": 1.990788389341632e-05, "loss": 0.9178, "step": 470 }, { "epoch": 0.07, "grad_norm": 3.9667867009878366, "learning_rate": 1.990721132975494e-05, "loss": 1.139, "step": 471 }, { "epoch": 0.07, "grad_norm": 4.996757965018768, "learning_rate": 1.9906536331178873e-05, "loss": 1.0152, "step": 472 }, { "epoch": 0.07, "grad_norm": 4.065537265184598, "learning_rate": 1.9905858897854013e-05, "loss": 1.0876, "step": 473 }, { "epoch": 0.07, "grad_norm": 3.853734590158613, "learning_rate": 1.990517902994686e-05, "loss": 1.0335, "step": 474 }, { "epoch": 0.07, "grad_norm": 3.7530342271361223, "learning_rate": 1.9904496727624498e-05, "loss": 1.1245, "step": 475 }, { "epoch": 0.07, "grad_norm": 3.770253015681632, "learning_rate": 1.9903811991054628e-05, "loss": 1.0104, "step": 476 }, { "epoch": 0.07, "grad_norm": 3.957710365597872, "learning_rate": 1.990312482040553e-05, "loss": 1.0127, "step": 477 }, { "epoch": 0.07, "grad_norm": 3.9657426975966645, "learning_rate": 1.9902435215846096e-05, "loss": 1.1692, "step": 478 }, { "epoch": 0.07, "grad_norm": 3.962760636201044, "learning_rate": 1.9901743177545807e-05, "loss": 1.0969, "step": 479 }, { "epoch": 0.07, "grad_norm": 4.0883879467739215, "learning_rate": 1.9901048705674752e-05, "loss": 1.0422, "step": 480 }, { "epoch": 0.07, "grad_norm": 4.118434571260297, "learning_rate": 1.990035180040361e-05, "loss": 1.0369, "step": 481 }, { "epoch": 0.07, "grad_norm": 25.230164702697778, "learning_rate": 1.9899652461903662e-05, "loss": 1.0999, "step": 482 }, { "epoch": 0.07, "grad_norm": 11.068930128392779, "learning_rate": 1.9898950690346784e-05, "loss": 1.0602, "step": 483 }, { "epoch": 0.07, "grad_norm": 5.64858207036738, "learning_rate": 1.9898246485905456e-05, "loss": 1.238, "step": 484 }, { "epoch": 0.07, "grad_norm": 5.167629424192774, "learning_rate": 1.9897539848752743e-05, "loss": 0.9315, "step": 485 }, { "epoch": 0.07, "grad_norm": 4.325684939850164, "learning_rate": 1.9896830779062325e-05, "loss": 1.0613, "step": 486 }, { "epoch": 0.07, "grad_norm": 4.591801602541418, "learning_rate": 1.989611927700847e-05, "loss": 1.1534, "step": 487 }, { "epoch": 0.07, "grad_norm": 3.8731127863301777, "learning_rate": 1.9895405342766044e-05, "loss": 1.0965, "step": 488 }, { "epoch": 0.07, "grad_norm": 3.9256611935348227, "learning_rate": 1.989468897651051e-05, "loss": 1.0757, "step": 489 }, { "epoch": 0.08, "grad_norm": 3.842769298855828, "learning_rate": 1.9893970178417933e-05, "loss": 1.0648, "step": 490 }, { "epoch": 0.08, "grad_norm": 4.188248922725501, "learning_rate": 1.989324894866497e-05, "loss": 1.0819, "step": 491 }, { "epoch": 0.08, "grad_norm": 4.498270743406448, "learning_rate": 1.9892525287428885e-05, "loss": 1.0548, "step": 492 }, { "epoch": 0.08, "grad_norm": 4.064522194976723, "learning_rate": 1.989179919488753e-05, "loss": 1.177, "step": 493 }, { "epoch": 0.08, "grad_norm": 4.192200466678487, "learning_rate": 1.9891070671219358e-05, "loss": 1.0633, "step": 494 }, { "epoch": 0.08, "grad_norm": 3.6610634108607893, "learning_rate": 1.9890339716603424e-05, "loss": 1.0271, "step": 495 }, { "epoch": 0.08, "grad_norm": 4.19383464605963, "learning_rate": 1.988960633121937e-05, "loss": 1.1628, "step": 496 }, { "epoch": 0.08, "grad_norm": 4.324148856756533, "learning_rate": 1.9888870515247445e-05, "loss": 1.1725, "step": 497 }, { "epoch": 0.08, "grad_norm": 4.094678732832611, "learning_rate": 1.988813226886849e-05, "loss": 1.1152, "step": 498 }, { "epoch": 0.08, "grad_norm": 100.17236461049514, "learning_rate": 1.9887391592263947e-05, "loss": 1.713, "step": 499 }, { "epoch": 0.08, "grad_norm": 4.200236194076793, "learning_rate": 1.9886648485615852e-05, "loss": 1.0963, "step": 500 }, { "epoch": 0.08, "grad_norm": 4.654556285087075, "learning_rate": 1.9885902949106842e-05, "loss": 1.1179, "step": 501 }, { "epoch": 0.08, "grad_norm": 3.559411065207602, "learning_rate": 1.988515498292015e-05, "loss": 1.0149, "step": 502 }, { "epoch": 0.08, "grad_norm": 11.07254697409706, "learning_rate": 1.9884404587239597e-05, "loss": 1.0651, "step": 503 }, { "epoch": 0.08, "grad_norm": 3.8097851753577676, "learning_rate": 1.9883651762249618e-05, "loss": 0.968, "step": 504 }, { "epoch": 0.08, "grad_norm": 4.229524204117181, "learning_rate": 1.9882896508135236e-05, "loss": 1.102, "step": 505 }, { "epoch": 0.08, "grad_norm": 3.8740072573172974, "learning_rate": 1.9882138825082066e-05, "loss": 1.1369, "step": 506 }, { "epoch": 0.08, "grad_norm": 3.7317673445914985, "learning_rate": 1.9881378713276323e-05, "loss": 0.9885, "step": 507 }, { "epoch": 0.08, "grad_norm": 10.295999670061802, "learning_rate": 1.9880616172904833e-05, "loss": 1.0094, "step": 508 }, { "epoch": 0.08, "grad_norm": 4.281729889433247, "learning_rate": 1.9879851204154996e-05, "loss": 0.9946, "step": 509 }, { "epoch": 0.08, "grad_norm": 4.396280320607497, "learning_rate": 1.9879083807214827e-05, "loss": 1.0109, "step": 510 }, { "epoch": 0.08, "grad_norm": 4.20457733433576, "learning_rate": 1.9878313982272926e-05, "loss": 1.179, "step": 511 }, { "epoch": 0.08, "grad_norm": 4.0518090897332595, "learning_rate": 1.9877541729518496e-05, "loss": 1.1367, "step": 512 }, { "epoch": 0.08, "grad_norm": 3.8944019034517012, "learning_rate": 1.9876767049141334e-05, "loss": 1.0746, "step": 513 }, { "epoch": 0.08, "grad_norm": 3.7920745114516183, "learning_rate": 1.987598994133184e-05, "loss": 1.1664, "step": 514 }, { "epoch": 0.08, "grad_norm": 3.9722247307359373, "learning_rate": 1.9875210406280993e-05, "loss": 1.1303, "step": 515 }, { "epoch": 0.08, "grad_norm": 4.182258325736007, "learning_rate": 1.9874428444180395e-05, "loss": 0.9902, "step": 516 }, { "epoch": 0.08, "grad_norm": 5.592378225884718, "learning_rate": 1.987364405522222e-05, "loss": 1.1012, "step": 517 }, { "epoch": 0.08, "grad_norm": 4.105204210476816, "learning_rate": 1.9872857239599254e-05, "loss": 1.1137, "step": 518 }, { "epoch": 0.08, "grad_norm": 4.167332270422541, "learning_rate": 1.9872067997504873e-05, "loss": 1.1159, "step": 519 }, { "epoch": 0.08, "grad_norm": 13.08947117736944, "learning_rate": 1.9871276329133054e-05, "loss": 1.0862, "step": 520 }, { "epoch": 0.08, "grad_norm": 4.049584156774763, "learning_rate": 1.9870482234678362e-05, "loss": 1.023, "step": 521 }, { "epoch": 0.08, "grad_norm": 4.232270233273242, "learning_rate": 1.9869685714335966e-05, "loss": 1.0735, "step": 522 }, { "epoch": 0.08, "grad_norm": 4.2805838206307625, "learning_rate": 1.986888676830162e-05, "loss": 1.0401, "step": 523 }, { "epoch": 0.08, "grad_norm": 4.004363644777837, "learning_rate": 1.9868085396771696e-05, "loss": 1.0112, "step": 524 }, { "epoch": 0.08, "grad_norm": 4.1293441807832805, "learning_rate": 1.9867281599943143e-05, "loss": 1.0584, "step": 525 }, { "epoch": 0.08, "grad_norm": 4.060908520373464, "learning_rate": 1.986647537801351e-05, "loss": 0.9488, "step": 526 }, { "epoch": 0.08, "grad_norm": 5.247746303908252, "learning_rate": 1.9865666731180946e-05, "loss": 1.0041, "step": 527 }, { "epoch": 0.08, "grad_norm": 5.175237819303534, "learning_rate": 1.9864855659644188e-05, "loss": 1.0653, "step": 528 }, { "epoch": 0.08, "grad_norm": 3.7167967522781247, "learning_rate": 1.986404216360258e-05, "loss": 1.0688, "step": 529 }, { "epoch": 0.08, "grad_norm": 3.819542500669309, "learning_rate": 1.986322624325606e-05, "loss": 1.1037, "step": 530 }, { "epoch": 0.08, "grad_norm": 4.298779141385325, "learning_rate": 1.986240789880515e-05, "loss": 1.1797, "step": 531 }, { "epoch": 0.08, "grad_norm": 3.7973224850258624, "learning_rate": 1.9861587130450983e-05, "loss": 1.0882, "step": 532 }, { "epoch": 0.08, "grad_norm": 14.693961767865298, "learning_rate": 1.9860763938395276e-05, "loss": 1.0669, "step": 533 }, { "epoch": 0.08, "grad_norm": 4.07838118819138, "learning_rate": 1.985993832284035e-05, "loss": 1.1385, "step": 534 }, { "epoch": 0.08, "grad_norm": 4.39069818425914, "learning_rate": 1.9859110283989115e-05, "loss": 1.0543, "step": 535 }, { "epoch": 0.08, "grad_norm": 4.034497990877742, "learning_rate": 1.985827982204508e-05, "loss": 1.096, "step": 536 }, { "epoch": 0.08, "grad_norm": 3.960533593233808, "learning_rate": 1.9857446937212354e-05, "loss": 1.0496, "step": 537 }, { "epoch": 0.08, "grad_norm": 4.309808184319671, "learning_rate": 1.9856611629695632e-05, "loss": 0.996, "step": 538 }, { "epoch": 0.08, "grad_norm": 3.5147331155631987, "learning_rate": 1.985577389970021e-05, "loss": 1.0178, "step": 539 }, { "epoch": 0.08, "grad_norm": 3.7971287131584917, "learning_rate": 1.9854933747431978e-05, "loss": 1.0094, "step": 540 }, { "epoch": 0.08, "grad_norm": 4.386234386949602, "learning_rate": 1.9854091173097423e-05, "loss": 1.1386, "step": 541 }, { "epoch": 0.08, "grad_norm": 4.000079644424622, "learning_rate": 1.9853246176903628e-05, "loss": 0.9622, "step": 542 }, { "epoch": 0.08, "grad_norm": 4.339334686890175, "learning_rate": 1.9852398759058267e-05, "loss": 1.0296, "step": 543 }, { "epoch": 0.08, "grad_norm": 3.7739468015723268, "learning_rate": 1.985154891976961e-05, "loss": 1.0136, "step": 544 }, { "epoch": 0.08, "grad_norm": 4.0151575509057436, "learning_rate": 1.9850696659246527e-05, "loss": 1.1141, "step": 545 }, { "epoch": 0.08, "grad_norm": 4.124140334325582, "learning_rate": 1.984984197769848e-05, "loss": 1.068, "step": 546 }, { "epoch": 0.08, "grad_norm": 6.798960767229643, "learning_rate": 1.984898487533552e-05, "loss": 1.042, "step": 547 }, { "epoch": 0.08, "grad_norm": 3.88888530591446, "learning_rate": 1.9848125352368304e-05, "loss": 1.0092, "step": 548 }, { "epoch": 0.08, "grad_norm": 5.590651116963613, "learning_rate": 1.984726340900808e-05, "loss": 1.0699, "step": 549 }, { "epoch": 0.08, "grad_norm": 13.111484667737962, "learning_rate": 1.9846399045466683e-05, "loss": 1.1293, "step": 550 }, { "epoch": 0.08, "grad_norm": 7.1949741389531106, "learning_rate": 1.9845532261956556e-05, "loss": 0.9913, "step": 551 }, { "epoch": 0.08, "grad_norm": 3.577889355072513, "learning_rate": 1.984466305869073e-05, "loss": 0.9844, "step": 552 }, { "epoch": 0.08, "grad_norm": 3.9399427915413283, "learning_rate": 1.9843791435882823e-05, "loss": 1.1503, "step": 553 }, { "epoch": 0.08, "grad_norm": 4.8808408631804285, "learning_rate": 1.9842917393747063e-05, "loss": 1.0585, "step": 554 }, { "epoch": 0.08, "grad_norm": 8.148362947766365, "learning_rate": 1.984204093249826e-05, "loss": 1.0008, "step": 555 }, { "epoch": 0.09, "grad_norm": 4.022762411797265, "learning_rate": 1.984116205235183e-05, "loss": 1.0576, "step": 556 }, { "epoch": 0.09, "grad_norm": 3.7848552941502684, "learning_rate": 1.984028075352377e-05, "loss": 1.1343, "step": 557 }, { "epoch": 0.09, "grad_norm": 4.194440701612392, "learning_rate": 1.9839397036230683e-05, "loss": 1.1125, "step": 558 }, { "epoch": 0.09, "grad_norm": 3.8417025422559186, "learning_rate": 1.983851090068976e-05, "loss": 1.0576, "step": 559 }, { "epoch": 0.09, "grad_norm": 3.6424172482294517, "learning_rate": 1.983762234711879e-05, "loss": 1.0682, "step": 560 }, { "epoch": 0.09, "grad_norm": 3.8586575805191674, "learning_rate": 1.9836731375736152e-05, "loss": 1.14, "step": 561 }, { "epoch": 0.09, "grad_norm": 3.7510893528582527, "learning_rate": 1.9835837986760826e-05, "loss": 1.0344, "step": 562 }, { "epoch": 0.09, "grad_norm": 3.8841637756939287, "learning_rate": 1.983494218041238e-05, "loss": 1.0781, "step": 563 }, { "epoch": 0.09, "grad_norm": 3.638573793002325, "learning_rate": 1.9834043956910977e-05, "loss": 1.0021, "step": 564 }, { "epoch": 0.09, "grad_norm": 3.84261917367866, "learning_rate": 1.9833143316477373e-05, "loss": 1.0403, "step": 565 }, { "epoch": 0.09, "grad_norm": 3.99726497722348, "learning_rate": 1.9832240259332926e-05, "loss": 1.129, "step": 566 }, { "epoch": 0.09, "grad_norm": 4.287995225308431, "learning_rate": 1.9831334785699573e-05, "loss": 1.0588, "step": 567 }, { "epoch": 0.09, "grad_norm": 4.517804901348297, "learning_rate": 1.9830426895799863e-05, "loss": 1.0125, "step": 568 }, { "epoch": 0.09, "grad_norm": 4.016138201292742, "learning_rate": 1.9829516589856927e-05, "loss": 1.1118, "step": 569 }, { "epoch": 0.09, "grad_norm": 3.7553954379546117, "learning_rate": 1.9828603868094493e-05, "loss": 1.0246, "step": 570 }, { "epoch": 0.09, "grad_norm": 27.1244431360396, "learning_rate": 1.982768873073688e-05, "loss": 1.1834, "step": 571 }, { "epoch": 0.09, "grad_norm": 3.972960301159113, "learning_rate": 1.9826771178009004e-05, "loss": 0.9803, "step": 572 }, { "epoch": 0.09, "grad_norm": 4.135770025339726, "learning_rate": 1.9825851210136377e-05, "loss": 1.0974, "step": 573 }, { "epoch": 0.09, "grad_norm": 3.661648194074585, "learning_rate": 1.98249288273451e-05, "loss": 1.0345, "step": 574 }, { "epoch": 0.09, "grad_norm": 4.2506882343689805, "learning_rate": 1.9824004029861865e-05, "loss": 1.0352, "step": 575 }, { "epoch": 0.09, "grad_norm": 3.918479068734851, "learning_rate": 1.9823076817913965e-05, "loss": 1.0562, "step": 576 }, { "epoch": 0.09, "grad_norm": 3.8531954074705776, "learning_rate": 1.9822147191729282e-05, "loss": 1.1355, "step": 577 }, { "epoch": 0.09, "grad_norm": 5.093074093090599, "learning_rate": 1.9821215151536292e-05, "loss": 1.0082, "step": 578 }, { "epoch": 0.09, "grad_norm": 3.74784655442073, "learning_rate": 1.9820280697564062e-05, "loss": 1.0291, "step": 579 }, { "epoch": 0.09, "grad_norm": 3.8133856210226313, "learning_rate": 1.981934383004226e-05, "loss": 1.0908, "step": 580 }, { "epoch": 0.09, "grad_norm": 4.275051791622801, "learning_rate": 1.9818404549201134e-05, "loss": 1.116, "step": 581 }, { "epoch": 0.09, "grad_norm": 3.717147775719318, "learning_rate": 1.981746285527154e-05, "loss": 1.0581, "step": 582 }, { "epoch": 0.09, "grad_norm": 4.179106753877097, "learning_rate": 1.9816518748484918e-05, "loss": 1.1118, "step": 583 }, { "epoch": 0.09, "grad_norm": 3.4321594846881496, "learning_rate": 1.9815572229073302e-05, "loss": 1.1244, "step": 584 }, { "epoch": 0.09, "grad_norm": 44.024519397743994, "learning_rate": 1.9814623297269318e-05, "loss": 1.1965, "step": 585 }, { "epoch": 0.09, "grad_norm": 3.688970794697736, "learning_rate": 1.981367195330619e-05, "loss": 0.9591, "step": 586 }, { "epoch": 0.09, "grad_norm": 4.613809556224252, "learning_rate": 1.9812718197417732e-05, "loss": 1.0524, "step": 587 }, { "epoch": 0.09, "grad_norm": 19.4274900048794, "learning_rate": 1.981176202983835e-05, "loss": 1.0762, "step": 588 }, { "epoch": 0.09, "grad_norm": 5.2882461921580655, "learning_rate": 1.981080345080304e-05, "loss": 1.1226, "step": 589 }, { "epoch": 0.09, "grad_norm": 4.470081133562788, "learning_rate": 1.980984246054739e-05, "loss": 1.0775, "step": 590 }, { "epoch": 0.09, "grad_norm": 4.236591608309034, "learning_rate": 1.98088790593076e-05, "loss": 1.1254, "step": 591 }, { "epoch": 0.09, "grad_norm": 3.943827833626578, "learning_rate": 1.9807913247320437e-05, "loss": 1.105, "step": 592 }, { "epoch": 0.09, "grad_norm": 5.051124059877304, "learning_rate": 1.9806945024823268e-05, "loss": 1.0184, "step": 593 }, { "epoch": 0.09, "grad_norm": 3.8137043251275897, "learning_rate": 1.9805974392054057e-05, "loss": 1.0157, "step": 594 }, { "epoch": 0.09, "grad_norm": 3.8810926289689256, "learning_rate": 1.980500134925136e-05, "loss": 1.0248, "step": 595 }, { "epoch": 0.09, "grad_norm": 4.249114811226602, "learning_rate": 1.9804025896654323e-05, "loss": 1.0927, "step": 596 }, { "epoch": 0.09, "grad_norm": 3.937037416045328, "learning_rate": 1.9803048034502686e-05, "loss": 0.9526, "step": 597 }, { "epoch": 0.09, "grad_norm": 3.785040156544341, "learning_rate": 1.9802067763036777e-05, "loss": 1.0215, "step": 598 }, { "epoch": 0.09, "grad_norm": 3.8696537498290473, "learning_rate": 1.9801085082497524e-05, "loss": 1.0136, "step": 599 }, { "epoch": 0.09, "grad_norm": 4.042449573541756, "learning_rate": 1.980009999312644e-05, "loss": 0.9528, "step": 600 }, { "epoch": 0.09, "grad_norm": 3.901999977345569, "learning_rate": 1.979911249516563e-05, "loss": 1.0932, "step": 601 }, { "epoch": 0.09, "grad_norm": 3.9792433955931603, "learning_rate": 1.9798122588857792e-05, "loss": 1.0753, "step": 602 }, { "epoch": 0.09, "grad_norm": 3.855871910802278, "learning_rate": 1.9797130274446226e-05, "loss": 0.9931, "step": 603 }, { "epoch": 0.09, "grad_norm": 3.8407523106332966, "learning_rate": 1.9796135552174806e-05, "loss": 1.052, "step": 604 }, { "epoch": 0.09, "grad_norm": 3.6097273941283787, "learning_rate": 1.979513842228801e-05, "loss": 1.0838, "step": 605 }, { "epoch": 0.09, "grad_norm": 3.7398132484603246, "learning_rate": 1.9794138885030904e-05, "loss": 1.0803, "step": 606 }, { "epoch": 0.09, "grad_norm": 3.9933304582799956, "learning_rate": 1.979313694064915e-05, "loss": 1.0301, "step": 607 }, { "epoch": 0.09, "grad_norm": 3.8702263071544802, "learning_rate": 1.979213258938899e-05, "loss": 1.091, "step": 608 }, { "epoch": 0.09, "grad_norm": 4.532117228434824, "learning_rate": 1.9791125831497275e-05, "loss": 1.0777, "step": 609 }, { "epoch": 0.09, "grad_norm": 3.6877373194692176, "learning_rate": 1.979011666722143e-05, "loss": 1.0882, "step": 610 }, { "epoch": 0.09, "grad_norm": 3.6714087433404483, "learning_rate": 1.9789105096809486e-05, "loss": 1.0265, "step": 611 }, { "epoch": 0.09, "grad_norm": 4.253830373999391, "learning_rate": 1.9788091120510054e-05, "loss": 1.0392, "step": 612 }, { "epoch": 0.09, "grad_norm": 4.383486203412253, "learning_rate": 1.978707473857234e-05, "loss": 0.9853, "step": 613 }, { "epoch": 0.09, "grad_norm": 3.9207782444723676, "learning_rate": 1.9786055951246145e-05, "loss": 1.0359, "step": 614 }, { "epoch": 0.09, "grad_norm": 4.27299707690402, "learning_rate": 1.978503475878186e-05, "loss": 1.0598, "step": 615 }, { "epoch": 0.09, "grad_norm": 3.611938859351543, "learning_rate": 1.978401116143046e-05, "loss": 1.0947, "step": 616 }, { "epoch": 0.09, "grad_norm": 4.110523917481957, "learning_rate": 1.9782985159443516e-05, "loss": 1.0986, "step": 617 }, { "epoch": 0.09, "grad_norm": 3.9721702902162335, "learning_rate": 1.97819567530732e-05, "loss": 0.9685, "step": 618 }, { "epoch": 0.09, "grad_norm": 3.6766477960601947, "learning_rate": 1.9780925942572254e-05, "loss": 0.9494, "step": 619 }, { "epoch": 0.09, "grad_norm": 4.257434563692485, "learning_rate": 1.9779892728194033e-05, "loss": 1.1736, "step": 620 }, { "epoch": 0.1, "grad_norm": 3.7810009161622755, "learning_rate": 1.977885711019246e-05, "loss": 0.9732, "step": 621 }, { "epoch": 0.1, "grad_norm": 4.154657757370879, "learning_rate": 1.977781908882207e-05, "loss": 1.0125, "step": 622 }, { "epoch": 0.1, "grad_norm": 12.90341959750237, "learning_rate": 1.9776778664337983e-05, "loss": 1.0735, "step": 623 }, { "epoch": 0.1, "grad_norm": 4.029745128260756, "learning_rate": 1.97757358369959e-05, "loss": 1.0611, "step": 624 }, { "epoch": 0.1, "grad_norm": 3.6947993759190734, "learning_rate": 1.9774690607052113e-05, "loss": 0.9721, "step": 625 }, { "epoch": 0.1, "grad_norm": 3.5784447673937834, "learning_rate": 1.9773642974763518e-05, "loss": 1.0404, "step": 626 }, { "epoch": 0.1, "grad_norm": 4.071737235973688, "learning_rate": 1.9772592940387592e-05, "loss": 0.997, "step": 627 }, { "epoch": 0.1, "grad_norm": 3.618782283976266, "learning_rate": 1.9771540504182404e-05, "loss": 1.148, "step": 628 }, { "epoch": 0.1, "grad_norm": 3.3777647657574272, "learning_rate": 1.9770485666406612e-05, "loss": 1.0647, "step": 629 }, { "epoch": 0.1, "grad_norm": 3.811698216066833, "learning_rate": 1.976942842731947e-05, "loss": 1.1163, "step": 630 }, { "epoch": 0.1, "grad_norm": 3.8554878867521087, "learning_rate": 1.976836878718081e-05, "loss": 1.0056, "step": 631 }, { "epoch": 0.1, "grad_norm": 3.5595599483392077, "learning_rate": 1.9767306746251073e-05, "loss": 1.0362, "step": 632 }, { "epoch": 0.1, "grad_norm": 4.355698503630512, "learning_rate": 1.976624230479127e-05, "loss": 1.0318, "step": 633 }, { "epoch": 0.1, "grad_norm": 3.5800019812910033, "learning_rate": 1.976517546306301e-05, "loss": 1.143, "step": 634 }, { "epoch": 0.1, "grad_norm": 4.109303971927999, "learning_rate": 1.9764106221328495e-05, "loss": 1.1507, "step": 635 }, { "epoch": 0.1, "grad_norm": 11.341830505198706, "learning_rate": 1.976303457985052e-05, "loss": 0.9271, "step": 636 }, { "epoch": 0.1, "grad_norm": 6.158302554726382, "learning_rate": 1.9761960538892456e-05, "loss": 1.0866, "step": 637 }, { "epoch": 0.1, "grad_norm": 3.6323990269274447, "learning_rate": 1.9760884098718277e-05, "loss": 1.0624, "step": 638 }, { "epoch": 0.1, "grad_norm": 3.593221906283679, "learning_rate": 1.9759805259592543e-05, "loss": 0.9801, "step": 639 }, { "epoch": 0.1, "grad_norm": 3.8310072853709722, "learning_rate": 1.97587240217804e-05, "loss": 1.1142, "step": 640 }, { "epoch": 0.1, "grad_norm": 3.708340807762302, "learning_rate": 1.975764038554758e-05, "loss": 1.065, "step": 641 }, { "epoch": 0.1, "grad_norm": 3.602682523381259, "learning_rate": 1.9756554351160423e-05, "loss": 1.0641, "step": 642 }, { "epoch": 0.1, "grad_norm": 3.677081539155567, "learning_rate": 1.9755465918885836e-05, "loss": 1.0378, "step": 643 }, { "epoch": 0.1, "grad_norm": 3.7668871395570487, "learning_rate": 1.975437508899133e-05, "loss": 1.1344, "step": 644 }, { "epoch": 0.1, "grad_norm": 3.8359506126491563, "learning_rate": 1.9753281861745e-05, "loss": 0.9694, "step": 645 }, { "epoch": 0.1, "grad_norm": 4.206408482603931, "learning_rate": 1.9752186237415528e-05, "loss": 1.0533, "step": 646 }, { "epoch": 0.1, "grad_norm": 3.7597389829258656, "learning_rate": 1.9751088216272186e-05, "loss": 1.1411, "step": 647 }, { "epoch": 0.1, "grad_norm": 3.7120014142519246, "learning_rate": 1.9749987798584845e-05, "loss": 1.0662, "step": 648 }, { "epoch": 0.1, "grad_norm": 3.9987562421524383, "learning_rate": 1.9748884984623952e-05, "loss": 1.0139, "step": 649 }, { "epoch": 0.1, "grad_norm": 4.282466973540785, "learning_rate": 1.974777977466054e-05, "loss": 1.0623, "step": 650 }, { "epoch": 0.1, "grad_norm": 4.193144935671022, "learning_rate": 1.9746672168966253e-05, "loss": 1.0129, "step": 651 }, { "epoch": 0.1, "grad_norm": 3.481908768091186, "learning_rate": 1.9745562167813302e-05, "loss": 0.9982, "step": 652 }, { "epoch": 0.1, "grad_norm": 3.7044461584994797, "learning_rate": 1.9744449771474496e-05, "loss": 1.0352, "step": 653 }, { "epoch": 0.1, "grad_norm": 4.21153552187076, "learning_rate": 1.9743334980223224e-05, "loss": 0.9555, "step": 654 }, { "epoch": 0.1, "grad_norm": 3.597834834338504, "learning_rate": 1.9742217794333483e-05, "loss": 1.0555, "step": 655 }, { "epoch": 0.1, "grad_norm": 4.509560249870177, "learning_rate": 1.9741098214079833e-05, "loss": 1.122, "step": 656 }, { "epoch": 0.1, "grad_norm": 3.634147646970652, "learning_rate": 1.9739976239737444e-05, "loss": 1.1282, "step": 657 }, { "epoch": 0.1, "grad_norm": 3.9355859624871243, "learning_rate": 1.973885187158206e-05, "loss": 1.0385, "step": 658 }, { "epoch": 0.1, "grad_norm": 3.9223226174844936, "learning_rate": 1.9737725109890027e-05, "loss": 1.155, "step": 659 }, { "epoch": 0.1, "grad_norm": 3.839110455653833, "learning_rate": 1.9736595954938263e-05, "loss": 0.9182, "step": 660 }, { "epoch": 0.1, "grad_norm": 3.3843046936219263, "learning_rate": 1.973546440700429e-05, "loss": 1.0475, "step": 661 }, { "epoch": 0.1, "grad_norm": 3.95335162441481, "learning_rate": 1.9734330466366204e-05, "loss": 1.1071, "step": 662 }, { "epoch": 0.1, "grad_norm": 3.5083022418894405, "learning_rate": 1.97331941333027e-05, "loss": 1.0379, "step": 663 }, { "epoch": 0.1, "grad_norm": 12.503320132172616, "learning_rate": 1.9732055408093055e-05, "loss": 1.095, "step": 664 }, { "epoch": 0.1, "grad_norm": 3.8602096098013314, "learning_rate": 1.973091429101714e-05, "loss": 0.9738, "step": 665 }, { "epoch": 0.1, "grad_norm": 3.567515287128138, "learning_rate": 1.9729770782355402e-05, "loss": 1.0115, "step": 666 }, { "epoch": 0.1, "grad_norm": 4.1619498585102885, "learning_rate": 1.9728624882388887e-05, "loss": 1.113, "step": 667 }, { "epoch": 0.1, "grad_norm": 3.8403129128241567, "learning_rate": 1.972747659139923e-05, "loss": 1.0106, "step": 668 }, { "epoch": 0.1, "grad_norm": 3.6732666376119303, "learning_rate": 1.972632590966864e-05, "loss": 1.0863, "step": 669 }, { "epoch": 0.1, "grad_norm": 8.63342630749796, "learning_rate": 1.9725172837479926e-05, "loss": 0.9528, "step": 670 }, { "epoch": 0.1, "grad_norm": 27.87691370932522, "learning_rate": 1.9724017375116482e-05, "loss": 1.176, "step": 671 }, { "epoch": 0.1, "grad_norm": 3.683702726110297, "learning_rate": 1.972285952286229e-05, "loss": 1.1573, "step": 672 }, { "epoch": 0.1, "grad_norm": 3.617695052292551, "learning_rate": 1.972169928100191e-05, "loss": 1.0399, "step": 673 }, { "epoch": 0.1, "grad_norm": 4.297740360065807, "learning_rate": 1.9720536649820506e-05, "loss": 0.9832, "step": 674 }, { "epoch": 0.1, "grad_norm": 3.9857184734105227, "learning_rate": 1.9719371629603815e-05, "loss": 1.0523, "step": 675 }, { "epoch": 0.1, "grad_norm": 3.5057614162854858, "learning_rate": 1.9718204220638172e-05, "loss": 1.0747, "step": 676 }, { "epoch": 0.1, "grad_norm": 3.682742988164712, "learning_rate": 1.9717034423210482e-05, "loss": 1.1584, "step": 677 }, { "epoch": 0.1, "grad_norm": 3.903662120563867, "learning_rate": 1.9715862237608257e-05, "loss": 1.0778, "step": 678 }, { "epoch": 0.1, "grad_norm": 4.647491392542745, "learning_rate": 1.9714687664119588e-05, "loss": 1.0308, "step": 679 }, { "epoch": 0.1, "grad_norm": 3.981060186015297, "learning_rate": 1.9713510703033146e-05, "loss": 0.9786, "step": 680 }, { "epoch": 0.1, "grad_norm": 3.9494055759818574, "learning_rate": 1.97123313546382e-05, "loss": 1.1111, "step": 681 }, { "epoch": 0.1, "grad_norm": 3.7899296812945624, "learning_rate": 1.9711149619224604e-05, "loss": 1.0374, "step": 682 }, { "epoch": 0.1, "grad_norm": 3.871215861412619, "learning_rate": 1.9709965497082786e-05, "loss": 1.111, "step": 683 }, { "epoch": 0.1, "grad_norm": 4.67068255262732, "learning_rate": 1.9708778988503777e-05, "loss": 1.2062, "step": 684 }, { "epoch": 0.1, "grad_norm": 3.657131507652763, "learning_rate": 1.9707590093779185e-05, "loss": 1.0664, "step": 685 }, { "epoch": 0.11, "grad_norm": 3.8816693045054795, "learning_rate": 1.9706398813201207e-05, "loss": 0.9103, "step": 686 }, { "epoch": 0.11, "grad_norm": 3.9452131690989654, "learning_rate": 1.970520514706263e-05, "loss": 1.0652, "step": 687 }, { "epoch": 0.11, "grad_norm": 3.8074508879557953, "learning_rate": 1.9704009095656818e-05, "loss": 1.0803, "step": 688 }, { "epoch": 0.11, "grad_norm": 3.6545433112322137, "learning_rate": 1.9702810659277728e-05, "loss": 1.0225, "step": 689 }, { "epoch": 0.11, "grad_norm": 3.981129458125146, "learning_rate": 1.9701609838219907e-05, "loss": 0.9947, "step": 690 }, { "epoch": 0.11, "grad_norm": 3.9045146363435643, "learning_rate": 1.9700406632778476e-05, "loss": 1.0019, "step": 691 }, { "epoch": 0.11, "grad_norm": 4.32347733310266, "learning_rate": 1.9699201043249155e-05, "loss": 1.0846, "step": 692 }, { "epoch": 0.11, "grad_norm": 3.686360328821075, "learning_rate": 1.969799306992824e-05, "loss": 1.0297, "step": 693 }, { "epoch": 0.11, "grad_norm": 4.01988978049729, "learning_rate": 1.9696782713112622e-05, "loss": 1.0701, "step": 694 }, { "epoch": 0.11, "grad_norm": 3.8563763177269594, "learning_rate": 1.9695569973099765e-05, "loss": 1.0882, "step": 695 }, { "epoch": 0.11, "grad_norm": 3.937134848854293, "learning_rate": 1.9694354850187735e-05, "loss": 1.1373, "step": 696 }, { "epoch": 0.11, "grad_norm": 3.8809806630437658, "learning_rate": 1.969313734467517e-05, "loss": 1.1474, "step": 697 }, { "epoch": 0.11, "grad_norm": 3.6916313779988994, "learning_rate": 1.96919174568613e-05, "loss": 0.9033, "step": 698 }, { "epoch": 0.11, "grad_norm": 3.508260916496033, "learning_rate": 1.969069518704594e-05, "loss": 1.0931, "step": 699 }, { "epoch": 0.11, "grad_norm": 4.763611485514735, "learning_rate": 1.9689470535529486e-05, "loss": 0.9209, "step": 700 }, { "epoch": 0.11, "grad_norm": 4.03664208026575, "learning_rate": 1.968824350261293e-05, "loss": 0.9931, "step": 701 }, { "epoch": 0.11, "grad_norm": 3.4671068542228993, "learning_rate": 1.9687014088597835e-05, "loss": 1.1389, "step": 702 }, { "epoch": 0.11, "grad_norm": 3.6583966015737657, "learning_rate": 1.968578229378636e-05, "loss": 1.1128, "step": 703 }, { "epoch": 0.11, "grad_norm": 4.10644984836611, "learning_rate": 1.9684548118481243e-05, "loss": 1.0528, "step": 704 }, { "epoch": 0.11, "grad_norm": 3.777346025347817, "learning_rate": 1.9683311562985814e-05, "loss": 1.1407, "step": 705 }, { "epoch": 0.11, "grad_norm": 4.164046825464703, "learning_rate": 1.9682072627603983e-05, "loss": 1.0261, "step": 706 }, { "epoch": 0.11, "grad_norm": 3.8758635415084726, "learning_rate": 1.968083131264024e-05, "loss": 1.1707, "step": 707 }, { "epoch": 0.11, "grad_norm": 3.12795773688994, "learning_rate": 1.9679587618399674e-05, "loss": 0.9405, "step": 708 }, { "epoch": 0.11, "grad_norm": 4.441454785504363, "learning_rate": 1.9678341545187942e-05, "loss": 0.953, "step": 709 }, { "epoch": 0.11, "grad_norm": 3.834429816258187, "learning_rate": 1.96770930933113e-05, "loss": 1.0836, "step": 710 }, { "epoch": 0.11, "grad_norm": 3.631866186382932, "learning_rate": 1.9675842263076582e-05, "loss": 1.0692, "step": 711 }, { "epoch": 0.11, "grad_norm": 3.6349093277364863, "learning_rate": 1.96745890547912e-05, "loss": 1.0568, "step": 712 }, { "epoch": 0.11, "grad_norm": 3.989233135030243, "learning_rate": 1.967333346876316e-05, "loss": 0.9956, "step": 713 }, { "epoch": 0.11, "grad_norm": 3.5976223876570796, "learning_rate": 1.967207550530106e-05, "loss": 1.1034, "step": 714 }, { "epoch": 0.11, "grad_norm": 3.5445659972537817, "learning_rate": 1.967081516471406e-05, "loss": 0.9946, "step": 715 }, { "epoch": 0.11, "grad_norm": 3.962320605519689, "learning_rate": 1.9669552447311924e-05, "loss": 1.0553, "step": 716 }, { "epoch": 0.11, "grad_norm": 3.617516912860955, "learning_rate": 1.9668287353404985e-05, "loss": 1.0419, "step": 717 }, { "epoch": 0.11, "grad_norm": 3.9464756935533716, "learning_rate": 1.9667019883304174e-05, "loss": 1.0305, "step": 718 }, { "epoch": 0.11, "grad_norm": 4.000106682444183, "learning_rate": 1.9665750037320997e-05, "loss": 1.0838, "step": 719 }, { "epoch": 0.11, "grad_norm": 3.6435817212177857, "learning_rate": 1.9664477815767547e-05, "loss": 1.0185, "step": 720 }, { "epoch": 0.11, "grad_norm": 4.0570616965153805, "learning_rate": 1.96632032189565e-05, "loss": 1.1407, "step": 721 }, { "epoch": 0.11, "grad_norm": 4.486195508863955, "learning_rate": 1.9661926247201114e-05, "loss": 1.1585, "step": 722 }, { "epoch": 0.11, "grad_norm": 4.229993019271557, "learning_rate": 1.966064690081524e-05, "loss": 1.0752, "step": 723 }, { "epoch": 0.11, "grad_norm": 4.189231501099848, "learning_rate": 1.9659365180113297e-05, "loss": 1.0476, "step": 724 }, { "epoch": 0.11, "grad_norm": 3.7017992871809446, "learning_rate": 1.96580810854103e-05, "loss": 1.0926, "step": 725 }, { "epoch": 0.11, "grad_norm": 3.44021198429482, "learning_rate": 1.965679461702185e-05, "loss": 0.9926, "step": 726 }, { "epoch": 0.11, "grad_norm": 3.803164724153642, "learning_rate": 1.9655505775264112e-05, "loss": 1.1952, "step": 727 }, { "epoch": 0.11, "grad_norm": 3.7636211547252727, "learning_rate": 1.965421456045385e-05, "loss": 1.1308, "step": 728 }, { "epoch": 0.11, "grad_norm": 14.558419284935113, "learning_rate": 1.9652920972908417e-05, "loss": 1.2105, "step": 729 }, { "epoch": 0.11, "grad_norm": 3.620390642949841, "learning_rate": 1.9651625012945735e-05, "loss": 1.0653, "step": 730 }, { "epoch": 0.11, "grad_norm": 3.8772827825120344, "learning_rate": 1.9650326680884312e-05, "loss": 0.9776, "step": 731 }, { "epoch": 0.11, "grad_norm": 3.7144554851138603, "learning_rate": 1.9649025977043248e-05, "loss": 0.992, "step": 732 }, { "epoch": 0.11, "grad_norm": 3.7390114991851795, "learning_rate": 1.9647722901742214e-05, "loss": 0.9189, "step": 733 }, { "epoch": 0.11, "grad_norm": 4.052214849078998, "learning_rate": 1.964641745530147e-05, "loss": 1.033, "step": 734 }, { "epoch": 0.11, "grad_norm": 3.806759472316589, "learning_rate": 1.9645109638041863e-05, "loss": 1.0422, "step": 735 }, { "epoch": 0.11, "grad_norm": 38.73670037211717, "learning_rate": 1.964379945028481e-05, "loss": 1.2231, "step": 736 }, { "epoch": 0.11, "grad_norm": 3.5072756289465525, "learning_rate": 1.9642486892352326e-05, "loss": 1.0772, "step": 737 }, { "epoch": 0.11, "grad_norm": 3.7031172727863413, "learning_rate": 1.9641171964566993e-05, "loss": 1.1608, "step": 738 }, { "epoch": 0.11, "grad_norm": 3.8411867594705162, "learning_rate": 1.9639854667251995e-05, "loss": 1.0666, "step": 739 }, { "epoch": 0.11, "grad_norm": 3.8576674197679814, "learning_rate": 1.9638535000731075e-05, "loss": 1.1506, "step": 740 }, { "epoch": 0.11, "grad_norm": 4.2951006484863985, "learning_rate": 1.9637212965328574e-05, "loss": 1.0101, "step": 741 }, { "epoch": 0.11, "grad_norm": 3.3545177240029, "learning_rate": 1.9635888561369415e-05, "loss": 1.0713, "step": 742 }, { "epoch": 0.11, "grad_norm": 3.4714756904233024, "learning_rate": 1.9634561789179093e-05, "loss": 1.0043, "step": 743 }, { "epoch": 0.11, "grad_norm": 3.3900929988252564, "learning_rate": 1.9633232649083697e-05, "loss": 0.9865, "step": 744 }, { "epoch": 0.11, "grad_norm": 3.8653969256444016, "learning_rate": 1.9631901141409888e-05, "loss": 1.0011, "step": 745 }, { "epoch": 0.11, "grad_norm": 3.5304687792541385, "learning_rate": 1.9630567266484918e-05, "loss": 1.0675, "step": 746 }, { "epoch": 0.11, "grad_norm": 3.41936470088297, "learning_rate": 1.962923102463661e-05, "loss": 1.0268, "step": 747 }, { "epoch": 0.11, "grad_norm": 3.9386474133604734, "learning_rate": 1.962789241619338e-05, "loss": 0.9598, "step": 748 }, { "epoch": 0.11, "grad_norm": 3.7646981028998376, "learning_rate": 1.962655144148422e-05, "loss": 1.0279, "step": 749 }, { "epoch": 0.11, "grad_norm": 3.60321616972536, "learning_rate": 1.9625208100838698e-05, "loss": 1.0293, "step": 750 }, { "epoch": 0.11, "grad_norm": 3.524009445596224, "learning_rate": 1.9623862394586978e-05, "loss": 1.043, "step": 751 }, { "epoch": 0.12, "grad_norm": 3.749208823784576, "learning_rate": 1.9622514323059788e-05, "loss": 1.0363, "step": 752 }, { "epoch": 0.12, "grad_norm": 3.5714432297536622, "learning_rate": 1.9621163886588457e-05, "loss": 0.9095, "step": 753 }, { "epoch": 0.12, "grad_norm": 3.644928681020688, "learning_rate": 1.9619811085504877e-05, "loss": 1.1088, "step": 754 }, { "epoch": 0.12, "grad_norm": 3.5243190898327037, "learning_rate": 1.961845592014153e-05, "loss": 1.08, "step": 755 }, { "epoch": 0.12, "grad_norm": 3.5845755162216784, "learning_rate": 1.961709839083147e-05, "loss": 1.0601, "step": 756 }, { "epoch": 0.12, "grad_norm": 3.8386738684758286, "learning_rate": 1.9615738497908355e-05, "loss": 1.109, "step": 757 }, { "epoch": 0.12, "grad_norm": 3.4281897633822194, "learning_rate": 1.96143762417064e-05, "loss": 1.0573, "step": 758 }, { "epoch": 0.12, "grad_norm": 3.9133448707630483, "learning_rate": 1.961301162256041e-05, "loss": 1.0018, "step": 759 }, { "epoch": 0.12, "grad_norm": 3.623091036318239, "learning_rate": 1.961164464080577e-05, "loss": 1.0683, "step": 760 }, { "epoch": 0.12, "grad_norm": 3.801994531879226, "learning_rate": 1.9610275296778443e-05, "loss": 1.0179, "step": 761 }, { "epoch": 0.12, "grad_norm": 3.742948802159206, "learning_rate": 1.960890359081498e-05, "loss": 0.9554, "step": 762 }, { "epoch": 0.12, "grad_norm": 3.6508770514039557, "learning_rate": 1.9607529523252507e-05, "loss": 0.9559, "step": 763 }, { "epoch": 0.12, "grad_norm": 4.021960305430146, "learning_rate": 1.960615309442873e-05, "loss": 1.0517, "step": 764 }, { "epoch": 0.12, "grad_norm": 3.593832974448563, "learning_rate": 1.9604774304681937e-05, "loss": 1.1218, "step": 765 }, { "epoch": 0.12, "grad_norm": 3.624834274865465, "learning_rate": 1.9603393154350996e-05, "loss": 0.9769, "step": 766 }, { "epoch": 0.12, "grad_norm": 6.1752990912282915, "learning_rate": 1.960200964377535e-05, "loss": 1.1958, "step": 767 }, { "epoch": 0.12, "grad_norm": 3.8413333660780666, "learning_rate": 1.960062377329504e-05, "loss": 0.9992, "step": 768 }, { "epoch": 0.12, "grad_norm": 4.261820789196503, "learning_rate": 1.9599235543250662e-05, "loss": 1.0441, "step": 769 }, { "epoch": 0.12, "grad_norm": 4.1863418239141374, "learning_rate": 1.959784495398341e-05, "loss": 1.1747, "step": 770 }, { "epoch": 0.12, "grad_norm": 3.6590672543008886, "learning_rate": 1.959645200583505e-05, "loss": 1.0349, "step": 771 }, { "epoch": 0.12, "grad_norm": 3.838377478601075, "learning_rate": 1.959505669914793e-05, "loss": 1.1176, "step": 772 }, { "epoch": 0.12, "grad_norm": 3.7519830934354257, "learning_rate": 1.9593659034264973e-05, "loss": 1.0497, "step": 773 }, { "epoch": 0.12, "grad_norm": 3.980442885458292, "learning_rate": 1.9592259011529694e-05, "loss": 1.0741, "step": 774 }, { "epoch": 0.12, "grad_norm": 4.041357306903657, "learning_rate": 1.9590856631286173e-05, "loss": 0.9991, "step": 775 }, { "epoch": 0.12, "grad_norm": 3.719310496292942, "learning_rate": 1.9589451893879075e-05, "loss": 1.0681, "step": 776 }, { "epoch": 0.12, "grad_norm": 3.648060644724543, "learning_rate": 1.958804479965365e-05, "loss": 1.0148, "step": 777 }, { "epoch": 0.12, "grad_norm": 4.385002892701598, "learning_rate": 1.9586635348955726e-05, "loss": 1.122, "step": 778 }, { "epoch": 0.12, "grad_norm": 3.288261943500257, "learning_rate": 1.9585223542131694e-05, "loss": 1.0396, "step": 779 }, { "epoch": 0.12, "grad_norm": 3.462958609822878, "learning_rate": 1.9583809379528543e-05, "loss": 0.9797, "step": 780 }, { "epoch": 0.12, "grad_norm": 3.405964285120037, "learning_rate": 1.9582392861493835e-05, "loss": 1.0695, "step": 781 }, { "epoch": 0.12, "grad_norm": 3.6432920238058637, "learning_rate": 1.958097398837571e-05, "loss": 0.9781, "step": 782 }, { "epoch": 0.12, "grad_norm": 3.481847514892652, "learning_rate": 1.9579552760522887e-05, "loss": 1.0203, "step": 783 }, { "epoch": 0.12, "grad_norm": 4.578362002892191, "learning_rate": 1.9578129178284664e-05, "loss": 1.0154, "step": 784 }, { "epoch": 0.12, "grad_norm": 4.002478093346479, "learning_rate": 1.9576703242010923e-05, "loss": 1.1206, "step": 785 }, { "epoch": 0.12, "grad_norm": 3.9881379135003465, "learning_rate": 1.9575274952052105e-05, "loss": 1.0538, "step": 786 }, { "epoch": 0.12, "grad_norm": 4.081683690181119, "learning_rate": 1.957384430875926e-05, "loss": 0.9941, "step": 787 }, { "epoch": 0.12, "grad_norm": 3.7175468212827343, "learning_rate": 1.957241131248399e-05, "loss": 1.0754, "step": 788 }, { "epoch": 0.12, "grad_norm": 5.534881287893714, "learning_rate": 1.9570975963578485e-05, "loss": 0.9715, "step": 789 }, { "epoch": 0.12, "grad_norm": 3.432903947303448, "learning_rate": 1.9569538262395517e-05, "loss": 0.9574, "step": 790 }, { "epoch": 0.12, "grad_norm": 9.796886352729334, "learning_rate": 1.9568098209288436e-05, "loss": 1.1519, "step": 791 }, { "epoch": 0.12, "grad_norm": 3.906061714461183, "learning_rate": 1.9566655804611156e-05, "loss": 1.0291, "step": 792 }, { "epoch": 0.12, "grad_norm": 4.770470886347632, "learning_rate": 1.956521104871819e-05, "loss": 1.2086, "step": 793 }, { "epoch": 0.12, "grad_norm": 4.223319458578048, "learning_rate": 1.9563763941964615e-05, "loss": 1.0458, "step": 794 }, { "epoch": 0.12, "grad_norm": 3.9765422553961254, "learning_rate": 1.956231448470609e-05, "loss": 1.0013, "step": 795 }, { "epoch": 0.12, "grad_norm": 3.7688427996518112, "learning_rate": 1.9560862677298848e-05, "loss": 1.0523, "step": 796 }, { "epoch": 0.12, "grad_norm": 3.944295892478544, "learning_rate": 1.9559408520099703e-05, "loss": 0.9943, "step": 797 }, { "epoch": 0.12, "grad_norm": 10.285120031612841, "learning_rate": 1.955795201346605e-05, "loss": 1.07, "step": 798 }, { "epoch": 0.12, "grad_norm": 3.360886095298879, "learning_rate": 1.955649315775585e-05, "loss": 1.0184, "step": 799 }, { "epoch": 0.12, "grad_norm": 3.453504735902985, "learning_rate": 1.955503195332766e-05, "loss": 1.0194, "step": 800 }, { "epoch": 0.12, "grad_norm": 3.7720666704980377, "learning_rate": 1.9553568400540594e-05, "loss": 1.1322, "step": 801 }, { "epoch": 0.12, "grad_norm": 4.250064309135509, "learning_rate": 1.9552102499754356e-05, "loss": 1.048, "step": 802 }, { "epoch": 0.12, "grad_norm": 4.151007308172151, "learning_rate": 1.955063425132922e-05, "loss": 1.0671, "step": 803 }, { "epoch": 0.12, "grad_norm": 4.504712656197753, "learning_rate": 1.9549163655626044e-05, "loss": 0.9555, "step": 804 }, { "epoch": 0.12, "grad_norm": 3.9652043005001696, "learning_rate": 1.954769071300626e-05, "loss": 1.1281, "step": 805 }, { "epoch": 0.12, "grad_norm": 3.417589770846741, "learning_rate": 1.954621542383187e-05, "loss": 0.9591, "step": 806 }, { "epoch": 0.12, "grad_norm": 3.1455662258661916, "learning_rate": 1.9544737788465465e-05, "loss": 0.9198, "step": 807 }, { "epoch": 0.12, "grad_norm": 3.371944981568343, "learning_rate": 1.9543257807270204e-05, "loss": 1.0036, "step": 808 }, { "epoch": 0.12, "grad_norm": 3.6233229149796298, "learning_rate": 1.9541775480609823e-05, "loss": 1.0705, "step": 809 }, { "epoch": 0.12, "grad_norm": 3.7455642763851267, "learning_rate": 1.9540290808848637e-05, "loss": 1.0605, "step": 810 }, { "epoch": 0.12, "grad_norm": 3.67169620521609, "learning_rate": 1.953880379235154e-05, "loss": 1.0644, "step": 811 }, { "epoch": 0.12, "grad_norm": 3.563725907642809, "learning_rate": 1.9537314431483996e-05, "loss": 1.0724, "step": 812 }, { "epoch": 0.12, "grad_norm": 3.6587065592888997, "learning_rate": 1.953582272661205e-05, "loss": 1.0925, "step": 813 }, { "epoch": 0.12, "grad_norm": 4.1180071058625725, "learning_rate": 1.9534328678102316e-05, "loss": 1.1036, "step": 814 }, { "epoch": 0.12, "grad_norm": 4.189957751724557, "learning_rate": 1.9532832286321996e-05, "loss": 1.0334, "step": 815 }, { "epoch": 0.12, "grad_norm": 3.4550391618359204, "learning_rate": 1.9531333551638858e-05, "loss": 1.035, "step": 816 }, { "epoch": 0.13, "grad_norm": 3.4523104004446936, "learning_rate": 1.952983247442125e-05, "loss": 1.1495, "step": 817 }, { "epoch": 0.13, "grad_norm": 3.324773915920645, "learning_rate": 1.9528329055038094e-05, "loss": 1.0231, "step": 818 }, { "epoch": 0.13, "grad_norm": 3.779739514408404, "learning_rate": 1.9526823293858888e-05, "loss": 1.0396, "step": 819 }, { "epoch": 0.13, "grad_norm": 3.5490924567379984, "learning_rate": 1.9525315191253706e-05, "loss": 1.0161, "step": 820 }, { "epoch": 0.13, "grad_norm": 3.455144063503647, "learning_rate": 1.95238047475932e-05, "loss": 1.0069, "step": 821 }, { "epoch": 0.13, "grad_norm": 3.790835644184474, "learning_rate": 1.9522291963248588e-05, "loss": 1.0302, "step": 822 }, { "epoch": 0.13, "grad_norm": 3.411888232632968, "learning_rate": 1.9520776838591673e-05, "loss": 0.993, "step": 823 }, { "epoch": 0.13, "grad_norm": 3.606694666212379, "learning_rate": 1.9519259373994834e-05, "loss": 0.9957, "step": 824 }, { "epoch": 0.13, "grad_norm": 10.792601477958348, "learning_rate": 1.951773956983102e-05, "loss": 1.1405, "step": 825 }, { "epoch": 0.13, "grad_norm": 3.8188365519437704, "learning_rate": 1.951621742647375e-05, "loss": 1.2124, "step": 826 }, { "epoch": 0.13, "grad_norm": 3.6597773196242165, "learning_rate": 1.9514692944297137e-05, "loss": 1.1435, "step": 827 }, { "epoch": 0.13, "grad_norm": 3.194410296347501, "learning_rate": 1.9513166123675838e-05, "loss": 1.0048, "step": 828 }, { "epoch": 0.13, "grad_norm": 3.582744555272188, "learning_rate": 1.951163696498512e-05, "loss": 0.9583, "step": 829 }, { "epoch": 0.13, "grad_norm": 3.6207705927160245, "learning_rate": 1.9510105468600797e-05, "loss": 1.0335, "step": 830 }, { "epoch": 0.13, "grad_norm": 3.6278228482113617, "learning_rate": 1.950857163489927e-05, "loss": 1.1164, "step": 831 }, { "epoch": 0.13, "grad_norm": 3.5307035817039605, "learning_rate": 1.9507035464257515e-05, "loss": 0.9872, "step": 832 }, { "epoch": 0.13, "grad_norm": 3.824472225302144, "learning_rate": 1.9505496957053078e-05, "loss": 1.0227, "step": 833 }, { "epoch": 0.13, "grad_norm": 3.9818563480117914, "learning_rate": 1.9503956113664078e-05, "loss": 1.1242, "step": 834 }, { "epoch": 0.13, "grad_norm": 3.869625239367482, "learning_rate": 1.9502412934469215e-05, "loss": 1.0929, "step": 835 }, { "epoch": 0.13, "grad_norm": 3.2976612309343882, "learning_rate": 1.950086741984776e-05, "loss": 1.0272, "step": 836 }, { "epoch": 0.13, "grad_norm": 12.769326972793142, "learning_rate": 1.949931957017955e-05, "loss": 1.1102, "step": 837 }, { "epoch": 0.13, "grad_norm": 3.8481939837470174, "learning_rate": 1.9497769385845012e-05, "loss": 1.025, "step": 838 }, { "epoch": 0.13, "grad_norm": 3.3352628206516677, "learning_rate": 1.9496216867225134e-05, "loss": 1.0924, "step": 839 }, { "epoch": 0.13, "grad_norm": 3.3533347409436707, "learning_rate": 1.949466201470148e-05, "loss": 1.0003, "step": 840 }, { "epoch": 0.13, "grad_norm": 3.542021472164938, "learning_rate": 1.9493104828656187e-05, "loss": 1.1117, "step": 841 }, { "epoch": 0.13, "grad_norm": 3.6301160736140052, "learning_rate": 1.949154530947198e-05, "loss": 1.0294, "step": 842 }, { "epoch": 0.13, "grad_norm": 3.3460425083645564, "learning_rate": 1.948998345753213e-05, "loss": 1.0436, "step": 843 }, { "epoch": 0.13, "grad_norm": 3.2446259615251285, "learning_rate": 1.9488419273220503e-05, "loss": 0.9428, "step": 844 }, { "epoch": 0.13, "grad_norm": 3.310632804542017, "learning_rate": 1.9486852756921534e-05, "loss": 1.0236, "step": 845 }, { "epoch": 0.13, "grad_norm": 4.364801744366941, "learning_rate": 1.9485283909020226e-05, "loss": 1.0437, "step": 846 }, { "epoch": 0.13, "grad_norm": 3.2934837675991124, "learning_rate": 1.9483712729902152e-05, "loss": 1.0043, "step": 847 }, { "epoch": 0.13, "grad_norm": 3.6795888850025262, "learning_rate": 1.9482139219953478e-05, "loss": 1.0745, "step": 848 }, { "epoch": 0.13, "grad_norm": 3.411422407071628, "learning_rate": 1.9480563379560915e-05, "loss": 0.934, "step": 849 }, { "epoch": 0.13, "grad_norm": 3.460071140538643, "learning_rate": 1.9478985209111767e-05, "loss": 1.0779, "step": 850 }, { "epoch": 0.13, "grad_norm": 3.380162168990996, "learning_rate": 1.9477404708993904e-05, "loss": 0.9893, "step": 851 }, { "epoch": 0.13, "grad_norm": 3.2651632117157217, "learning_rate": 1.9475821879595765e-05, "loss": 0.9183, "step": 852 }, { "epoch": 0.13, "grad_norm": 3.2452134078511325, "learning_rate": 1.947423672130637e-05, "loss": 1.0649, "step": 853 }, { "epoch": 0.13, "grad_norm": 3.754485555340307, "learning_rate": 1.9472649234515303e-05, "loss": 1.2328, "step": 854 }, { "epoch": 0.13, "grad_norm": 3.3907456912187404, "learning_rate": 1.9471059419612724e-05, "loss": 1.1444, "step": 855 }, { "epoch": 0.13, "grad_norm": 4.104580673184325, "learning_rate": 1.9469467276989366e-05, "loss": 0.8636, "step": 856 }, { "epoch": 0.13, "grad_norm": 3.5082479856079214, "learning_rate": 1.946787280703653e-05, "loss": 0.9407, "step": 857 }, { "epoch": 0.13, "grad_norm": 4.222298799756904, "learning_rate": 1.9466276010146097e-05, "loss": 1.1906, "step": 858 }, { "epoch": 0.13, "grad_norm": 3.4676251777168177, "learning_rate": 1.9464676886710513e-05, "loss": 0.9284, "step": 859 }, { "epoch": 0.13, "grad_norm": 3.748250993913235, "learning_rate": 1.9463075437122792e-05, "loss": 1.0374, "step": 860 }, { "epoch": 0.13, "grad_norm": 3.5471276859661747, "learning_rate": 1.9461471661776536e-05, "loss": 1.105, "step": 861 }, { "epoch": 0.13, "grad_norm": 3.8740533564053607, "learning_rate": 1.94598655610659e-05, "loss": 0.9877, "step": 862 }, { "epoch": 0.13, "grad_norm": 3.680125228295212, "learning_rate": 1.945825713538562e-05, "loss": 1.1043, "step": 863 }, { "epoch": 0.13, "grad_norm": 4.148577769562295, "learning_rate": 1.9456646385131006e-05, "loss": 1.0213, "step": 864 }, { "epoch": 0.13, "grad_norm": 9.574910256683985, "learning_rate": 1.945503331069793e-05, "loss": 1.1023, "step": 865 }, { "epoch": 0.13, "grad_norm": 3.8228224803141773, "learning_rate": 1.9453417912482843e-05, "loss": 1.0985, "step": 866 }, { "epoch": 0.13, "grad_norm": 3.7000581035892046, "learning_rate": 1.9451800190882767e-05, "loss": 1.0697, "step": 867 }, { "epoch": 0.13, "grad_norm": 3.529672076487252, "learning_rate": 1.9450180146295286e-05, "loss": 0.8486, "step": 868 }, { "epoch": 0.13, "grad_norm": 3.5757251881192853, "learning_rate": 1.944855777911857e-05, "loss": 1.1062, "step": 869 }, { "epoch": 0.13, "grad_norm": 3.895084514836089, "learning_rate": 1.9446933089751342e-05, "loss": 1.0017, "step": 870 }, { "epoch": 0.13, "grad_norm": 3.4793166789795213, "learning_rate": 1.944530607859291e-05, "loss": 1.0879, "step": 871 }, { "epoch": 0.13, "grad_norm": 3.9300137550777734, "learning_rate": 1.9443676746043146e-05, "loss": 0.9493, "step": 872 }, { "epoch": 0.13, "grad_norm": 3.730736121130802, "learning_rate": 1.94420450925025e-05, "loss": 1.0333, "step": 873 }, { "epoch": 0.13, "grad_norm": 3.9223348859910074, "learning_rate": 1.944041111837198e-05, "loss": 1.1305, "step": 874 }, { "epoch": 0.13, "grad_norm": 3.418566499173944, "learning_rate": 1.9438774824053174e-05, "loss": 1.0247, "step": 875 }, { "epoch": 0.13, "grad_norm": 5.710512251678389, "learning_rate": 1.9437136209948235e-05, "loss": 1.0405, "step": 876 }, { "epoch": 0.13, "grad_norm": 3.6291136655677816, "learning_rate": 1.9435495276459892e-05, "loss": 1.0759, "step": 877 }, { "epoch": 0.13, "grad_norm": 3.6660336507415625, "learning_rate": 1.9433852023991438e-05, "loss": 1.051, "step": 878 }, { "epoch": 0.13, "grad_norm": 3.7405109288915126, "learning_rate": 1.9432206452946738e-05, "loss": 0.9667, "step": 879 }, { "epoch": 0.13, "grad_norm": 3.795044548099862, "learning_rate": 1.943055856373023e-05, "loss": 0.9896, "step": 880 }, { "epoch": 0.13, "grad_norm": 3.226216202872285, "learning_rate": 1.9428908356746914e-05, "loss": 1.0357, "step": 881 }, { "epoch": 0.14, "grad_norm": 3.6773692597552903, "learning_rate": 1.942725583240237e-05, "loss": 0.9852, "step": 882 }, { "epoch": 0.14, "grad_norm": 3.5055818432672696, "learning_rate": 1.9425600991102737e-05, "loss": 1.0631, "step": 883 }, { "epoch": 0.14, "grad_norm": 3.7887724499734183, "learning_rate": 1.9423943833254736e-05, "loss": 1.0482, "step": 884 }, { "epoch": 0.14, "grad_norm": 3.497326903888074, "learning_rate": 1.9422284359265638e-05, "loss": 0.9832, "step": 885 }, { "epoch": 0.14, "grad_norm": 3.5198908403193294, "learning_rate": 1.942062256954331e-05, "loss": 0.9818, "step": 886 }, { "epoch": 0.14, "grad_norm": 3.6349791779928973, "learning_rate": 1.941895846449616e-05, "loss": 0.9803, "step": 887 }, { "epoch": 0.14, "grad_norm": 4.139055135898923, "learning_rate": 1.9417292044533187e-05, "loss": 1.1132, "step": 888 }, { "epoch": 0.14, "grad_norm": 3.3110527123846234, "learning_rate": 1.9415623310063946e-05, "loss": 0.99, "step": 889 }, { "epoch": 0.14, "grad_norm": 3.4959525311625033, "learning_rate": 1.941395226149857e-05, "loss": 1.0202, "step": 890 }, { "epoch": 0.14, "grad_norm": 3.1293930744554226, "learning_rate": 1.9412278899247748e-05, "loss": 0.9667, "step": 891 }, { "epoch": 0.14, "grad_norm": 3.2870879037590712, "learning_rate": 1.941060322372275e-05, "loss": 1.0469, "step": 892 }, { "epoch": 0.14, "grad_norm": 3.432365023023609, "learning_rate": 1.940892523533541e-05, "loss": 0.9081, "step": 893 }, { "epoch": 0.14, "grad_norm": 3.349988942640587, "learning_rate": 1.940724493449813e-05, "loss": 0.9457, "step": 894 }, { "epoch": 0.14, "grad_norm": 3.060076170295506, "learning_rate": 1.9405562321623884e-05, "loss": 1.0294, "step": 895 }, { "epoch": 0.14, "grad_norm": 3.5164199715881743, "learning_rate": 1.940387739712621e-05, "loss": 0.9969, "step": 896 }, { "epoch": 0.14, "grad_norm": 3.4851977251380837, "learning_rate": 1.940219016141921e-05, "loss": 0.9124, "step": 897 }, { "epoch": 0.14, "grad_norm": 3.8061545721513124, "learning_rate": 1.9400500614917563e-05, "loss": 1.0424, "step": 898 }, { "epoch": 0.14, "grad_norm": 3.424299477384805, "learning_rate": 1.939880875803651e-05, "loss": 1.0319, "step": 899 }, { "epoch": 0.14, "grad_norm": 3.744122164546516, "learning_rate": 1.9397114591191866e-05, "loss": 1.0477, "step": 900 }, { "epoch": 0.14, "grad_norm": 3.445323224862705, "learning_rate": 1.9395418114800005e-05, "loss": 1.0916, "step": 901 }, { "epoch": 0.14, "grad_norm": 3.93929967636297, "learning_rate": 1.939371932927788e-05, "loss": 0.9989, "step": 902 }, { "epoch": 0.14, "grad_norm": 7.544785983942014, "learning_rate": 1.9392018235043e-05, "loss": 1.152, "step": 903 }, { "epoch": 0.14, "grad_norm": 3.892545070640911, "learning_rate": 1.9390314832513442e-05, "loss": 1.1214, "step": 904 }, { "epoch": 0.14, "grad_norm": 21.987263122887104, "learning_rate": 1.9388609122107864e-05, "loss": 1.1062, "step": 905 }, { "epoch": 0.14, "grad_norm": 3.6562630231187017, "learning_rate": 1.9386901104245475e-05, "loss": 0.9848, "step": 906 }, { "epoch": 0.14, "grad_norm": 3.518496911988627, "learning_rate": 1.9385190779346058e-05, "loss": 1.0192, "step": 907 }, { "epoch": 0.14, "grad_norm": 3.5625079045270605, "learning_rate": 1.938347814782997e-05, "loss": 1.0707, "step": 908 }, { "epoch": 0.14, "grad_norm": 3.528834299569891, "learning_rate": 1.938176321011812e-05, "loss": 0.9774, "step": 909 }, { "epoch": 0.14, "grad_norm": 3.6018264100036257, "learning_rate": 1.9380045966631994e-05, "loss": 1.1302, "step": 910 }, { "epoch": 0.14, "grad_norm": 3.312438165651826, "learning_rate": 1.9378326417793646e-05, "loss": 1.0168, "step": 911 }, { "epoch": 0.14, "grad_norm": 3.2024102793678733, "learning_rate": 1.9376604564025685e-05, "loss": 1.0516, "step": 912 }, { "epoch": 0.14, "grad_norm": 3.5675605420399723, "learning_rate": 1.93748804057513e-05, "loss": 1.1061, "step": 913 }, { "epoch": 0.14, "grad_norm": 3.826640315336116, "learning_rate": 1.9373153943394242e-05, "loss": 1.0564, "step": 914 }, { "epoch": 0.14, "grad_norm": 3.624789270347231, "learning_rate": 1.9371425177378824e-05, "loss": 1.0781, "step": 915 }, { "epoch": 0.14, "grad_norm": 3.5070096506335084, "learning_rate": 1.9369694108129928e-05, "loss": 1.0496, "step": 916 }, { "epoch": 0.14, "grad_norm": 3.6300723896660076, "learning_rate": 1.9367960736073002e-05, "loss": 1.0479, "step": 917 }, { "epoch": 0.14, "grad_norm": 3.578842278567324, "learning_rate": 1.9366225061634064e-05, "loss": 0.9765, "step": 918 }, { "epoch": 0.14, "grad_norm": 3.581842751711827, "learning_rate": 1.936448708523969e-05, "loss": 1.1024, "step": 919 }, { "epoch": 0.14, "grad_norm": 3.1951935078674176, "learning_rate": 1.936274680731703e-05, "loss": 0.9714, "step": 920 }, { "epoch": 0.14, "grad_norm": 3.331940461952663, "learning_rate": 1.9361004228293788e-05, "loss": 0.94, "step": 921 }, { "epoch": 0.14, "grad_norm": 3.8236395933927243, "learning_rate": 1.935925934859825e-05, "loss": 1.0488, "step": 922 }, { "epoch": 0.14, "grad_norm": 3.363276927878964, "learning_rate": 1.935751216865925e-05, "loss": 1.1099, "step": 923 }, { "epoch": 0.14, "grad_norm": 3.596516227911937, "learning_rate": 1.9355762688906202e-05, "loss": 1.116, "step": 924 }, { "epoch": 0.14, "grad_norm": 3.348121383472041, "learning_rate": 1.935401090976908e-05, "loss": 1.0218, "step": 925 }, { "epoch": 0.14, "grad_norm": 3.733343930017785, "learning_rate": 1.935225683167842e-05, "loss": 1.0043, "step": 926 }, { "epoch": 0.14, "grad_norm": 3.758966389591936, "learning_rate": 1.935050045506532e-05, "loss": 0.9766, "step": 927 }, { "epoch": 0.14, "grad_norm": 3.580226561244626, "learning_rate": 1.9348741780361455e-05, "loss": 1.0552, "step": 928 }, { "epoch": 0.14, "grad_norm": 4.206751124430905, "learning_rate": 1.9346980807999055e-05, "loss": 1.0489, "step": 929 }, { "epoch": 0.14, "grad_norm": 30.23771096559672, "learning_rate": 1.934521753841092e-05, "loss": 1.2118, "step": 930 }, { "epoch": 0.14, "grad_norm": 3.322973754926158, "learning_rate": 1.9343451972030407e-05, "loss": 1.0232, "step": 931 }, { "epoch": 0.14, "grad_norm": 3.6437034126980783, "learning_rate": 1.934168410929145e-05, "loss": 1.02, "step": 932 }, { "epoch": 0.14, "grad_norm": 3.9613253906670383, "learning_rate": 1.9339913950628536e-05, "loss": 0.9895, "step": 933 }, { "epoch": 0.14, "grad_norm": 3.7446020566588074, "learning_rate": 1.933814149647672e-05, "loss": 0.999, "step": 934 }, { "epoch": 0.14, "grad_norm": 3.666157074758006, "learning_rate": 1.933636674727162e-05, "loss": 1.0742, "step": 935 }, { "epoch": 0.14, "grad_norm": 3.5990163747656454, "learning_rate": 1.9334589703449424e-05, "loss": 0.9915, "step": 936 }, { "epoch": 0.14, "grad_norm": 3.679207043975646, "learning_rate": 1.9332810365446876e-05, "loss": 1.1157, "step": 937 }, { "epoch": 0.14, "grad_norm": 3.2096157271834107, "learning_rate": 1.9331028733701292e-05, "loss": 0.9847, "step": 938 }, { "epoch": 0.14, "grad_norm": 3.7360702314930085, "learning_rate": 1.932924480865054e-05, "loss": 1.0495, "step": 939 }, { "epoch": 0.14, "grad_norm": 12.163737309143626, "learning_rate": 1.9327458590733062e-05, "loss": 1.1496, "step": 940 }, { "epoch": 0.14, "grad_norm": 3.2904270215911993, "learning_rate": 1.9325670080387863e-05, "loss": 0.9515, "step": 941 }, { "epoch": 0.14, "grad_norm": 3.6522178641809253, "learning_rate": 1.932387927805451e-05, "loss": 1.0595, "step": 942 }, { "epoch": 0.14, "grad_norm": 3.46856880025099, "learning_rate": 1.9322086184173122e-05, "loss": 1.0325, "step": 943 }, { "epoch": 0.14, "grad_norm": 3.287603494668713, "learning_rate": 1.93202907991844e-05, "loss": 0.9141, "step": 944 }, { "epoch": 0.14, "grad_norm": 3.6251112696061867, "learning_rate": 1.9318493123529597e-05, "loss": 0.9155, "step": 945 }, { "epoch": 0.14, "grad_norm": 2.987137986210219, "learning_rate": 1.931669315765053e-05, "loss": 1.0145, "step": 946 }, { "epoch": 0.14, "grad_norm": 3.3424632848737277, "learning_rate": 1.9314890901989583e-05, "loss": 1.0415, "step": 947 }, { "epoch": 0.15, "grad_norm": 3.2353948522445077, "learning_rate": 1.9313086356989698e-05, "loss": 0.9595, "step": 948 }, { "epoch": 0.15, "grad_norm": 3.576702121389513, "learning_rate": 1.931127952309438e-05, "loss": 1.1594, "step": 949 }, { "epoch": 0.15, "grad_norm": 3.3757575820385246, "learning_rate": 1.93094704007477e-05, "loss": 1.0258, "step": 950 }, { "epoch": 0.15, "grad_norm": 3.293818922537095, "learning_rate": 1.9307658990394293e-05, "loss": 1.0243, "step": 951 }, { "epoch": 0.15, "grad_norm": 3.3688391700609315, "learning_rate": 1.9305845292479346e-05, "loss": 1.009, "step": 952 }, { "epoch": 0.15, "grad_norm": 9.80828691267798, "learning_rate": 1.930402930744862e-05, "loss": 1.1966, "step": 953 }, { "epoch": 0.15, "grad_norm": 3.35247059313853, "learning_rate": 1.930221103574843e-05, "loss": 0.9744, "step": 954 }, { "epoch": 0.15, "grad_norm": 3.4461099267973307, "learning_rate": 1.930039047782566e-05, "loss": 1.0224, "step": 955 }, { "epoch": 0.15, "grad_norm": 3.611440236247767, "learning_rate": 1.9298567634127748e-05, "loss": 1.0344, "step": 956 }, { "epoch": 0.15, "grad_norm": 3.713992871427326, "learning_rate": 1.92967425051027e-05, "loss": 1.0251, "step": 957 }, { "epoch": 0.15, "grad_norm": 3.449908187883703, "learning_rate": 1.929491509119908e-05, "loss": 0.9121, "step": 958 }, { "epoch": 0.15, "grad_norm": 4.062615991041237, "learning_rate": 1.9293085392866016e-05, "loss": 0.9828, "step": 959 }, { "epoch": 0.15, "grad_norm": 3.42483551399482, "learning_rate": 1.9291253410553198e-05, "loss": 0.9705, "step": 960 }, { "epoch": 0.15, "grad_norm": 2.945041099579405, "learning_rate": 1.9289419144710874e-05, "loss": 1.0338, "step": 961 }, { "epoch": 0.15, "grad_norm": 19.364824164234715, "learning_rate": 1.928758259578986e-05, "loss": 1.1394, "step": 962 }, { "epoch": 0.15, "grad_norm": 3.7583163155418124, "learning_rate": 1.928574376424152e-05, "loss": 1.0891, "step": 963 }, { "epoch": 0.15, "grad_norm": 3.6452998336249296, "learning_rate": 1.9283902650517792e-05, "loss": 1.0854, "step": 964 }, { "epoch": 0.15, "grad_norm": 3.4468051684722307, "learning_rate": 1.9282059255071166e-05, "loss": 0.9391, "step": 965 }, { "epoch": 0.15, "grad_norm": 3.7334849226378406, "learning_rate": 1.9280213578354704e-05, "loss": 1.0808, "step": 966 }, { "epoch": 0.15, "grad_norm": 3.623689601351252, "learning_rate": 1.927836562082202e-05, "loss": 1.0664, "step": 967 }, { "epoch": 0.15, "grad_norm": 3.1724322437995816, "learning_rate": 1.9276515382927284e-05, "loss": 0.9325, "step": 968 }, { "epoch": 0.15, "grad_norm": 3.5921713171702434, "learning_rate": 1.9274662865125235e-05, "loss": 0.9363, "step": 969 }, { "epoch": 0.15, "grad_norm": 3.2800275079762544, "learning_rate": 1.9272808067871173e-05, "loss": 0.9843, "step": 970 }, { "epoch": 0.15, "grad_norm": 3.5548791781879294, "learning_rate": 1.9270950991620955e-05, "loss": 1.0932, "step": 971 }, { "epoch": 0.15, "grad_norm": 3.4175167933953956, "learning_rate": 1.9269091636830998e-05, "loss": 1.0733, "step": 972 }, { "epoch": 0.15, "grad_norm": 3.3628941721131165, "learning_rate": 1.9267230003958276e-05, "loss": 0.9672, "step": 973 }, { "epoch": 0.15, "grad_norm": 3.6295281626924045, "learning_rate": 1.926536609346033e-05, "loss": 1.0338, "step": 974 }, { "epoch": 0.15, "grad_norm": 3.648118265875055, "learning_rate": 1.926349990579525e-05, "loss": 0.8895, "step": 975 }, { "epoch": 0.15, "grad_norm": 3.1202643710316105, "learning_rate": 1.9261631441421703e-05, "loss": 0.9469, "step": 976 }, { "epoch": 0.15, "grad_norm": 3.649872049484259, "learning_rate": 1.92597607007989e-05, "loss": 1.0917, "step": 977 }, { "epoch": 0.15, "grad_norm": 4.175536441933523, "learning_rate": 1.9257887684386614e-05, "loss": 1.112, "step": 978 }, { "epoch": 0.15, "grad_norm": 3.1752679166085684, "learning_rate": 1.925601239264518e-05, "loss": 1.0251, "step": 979 }, { "epoch": 0.15, "grad_norm": 3.681899019040447, "learning_rate": 1.92541348260355e-05, "loss": 1.039, "step": 980 }, { "epoch": 0.15, "grad_norm": 3.4234714701595217, "learning_rate": 1.925225498501902e-05, "loss": 0.964, "step": 981 }, { "epoch": 0.15, "grad_norm": 3.71223071364033, "learning_rate": 1.9250372870057754e-05, "loss": 1.0858, "step": 982 }, { "epoch": 0.15, "grad_norm": 4.212599435406759, "learning_rate": 1.924848848161427e-05, "loss": 0.948, "step": 983 }, { "epoch": 0.15, "grad_norm": 3.5192934057012577, "learning_rate": 1.9246601820151705e-05, "loss": 1.0132, "step": 984 }, { "epoch": 0.15, "grad_norm": 3.928109835134174, "learning_rate": 1.924471288613374e-05, "loss": 1.0298, "step": 985 }, { "epoch": 0.15, "grad_norm": 3.7549381559022263, "learning_rate": 1.9242821680024625e-05, "loss": 1.0001, "step": 986 }, { "epoch": 0.15, "grad_norm": 4.239151421371474, "learning_rate": 1.9240928202289168e-05, "loss": 1.061, "step": 987 }, { "epoch": 0.15, "grad_norm": 3.54700933413855, "learning_rate": 1.9239032453392733e-05, "loss": 1.0863, "step": 988 }, { "epoch": 0.15, "grad_norm": 3.670780179665936, "learning_rate": 1.9237134433801235e-05, "loss": 1.1231, "step": 989 }, { "epoch": 0.15, "grad_norm": 3.289598309932846, "learning_rate": 1.923523414398116e-05, "loss": 1.0588, "step": 990 }, { "epoch": 0.15, "grad_norm": 3.489414422570557, "learning_rate": 1.9233331584399542e-05, "loss": 0.8921, "step": 991 }, { "epoch": 0.15, "grad_norm": 3.4955835415910568, "learning_rate": 1.923142675552398e-05, "loss": 0.9683, "step": 992 }, { "epoch": 0.15, "grad_norm": 11.967525951762857, "learning_rate": 1.9229519657822627e-05, "loss": 1.1301, "step": 993 }, { "epoch": 0.15, "grad_norm": 3.6364302200755745, "learning_rate": 1.9227610291764194e-05, "loss": 1.0683, "step": 994 }, { "epoch": 0.15, "grad_norm": 3.6486532832454164, "learning_rate": 1.922569865781795e-05, "loss": 1.005, "step": 995 }, { "epoch": 0.15, "grad_norm": 4.081910338426013, "learning_rate": 1.922378475645372e-05, "loss": 1.0145, "step": 996 }, { "epoch": 0.15, "grad_norm": 3.4099413617680425, "learning_rate": 1.9221868588141886e-05, "loss": 0.943, "step": 997 }, { "epoch": 0.15, "grad_norm": 3.570163953758169, "learning_rate": 1.9219950153353393e-05, "loss": 1.159, "step": 998 }, { "epoch": 0.15, "grad_norm": 3.3798061968174875, "learning_rate": 1.9218029452559733e-05, "loss": 1.043, "step": 999 }, { "epoch": 0.15, "grad_norm": 3.4073060967897386, "learning_rate": 1.9216106486232965e-05, "loss": 1.0475, "step": 1000 }, { "epoch": 0.15, "grad_norm": 3.943847825429316, "learning_rate": 1.9214181254845696e-05, "loss": 1.0153, "step": 1001 }, { "epoch": 0.15, "grad_norm": 3.7346551319913384, "learning_rate": 1.92122537588711e-05, "loss": 0.957, "step": 1002 }, { "epoch": 0.15, "grad_norm": 3.4143581157133607, "learning_rate": 1.9210323998782892e-05, "loss": 1.0708, "step": 1003 }, { "epoch": 0.15, "grad_norm": 3.551479921614783, "learning_rate": 1.920839197505536e-05, "loss": 1.0025, "step": 1004 }, { "epoch": 0.15, "grad_norm": 3.542686643829843, "learning_rate": 1.9206457688163338e-05, "loss": 0.9833, "step": 1005 }, { "epoch": 0.15, "grad_norm": 3.5015473236779293, "learning_rate": 1.9204521138582227e-05, "loss": 1.0198, "step": 1006 }, { "epoch": 0.15, "grad_norm": 3.563350601802636, "learning_rate": 1.9202582326787963e-05, "loss": 1.0396, "step": 1007 }, { "epoch": 0.15, "grad_norm": 3.292978792419636, "learning_rate": 1.9200641253257064e-05, "loss": 1.0761, "step": 1008 }, { "epoch": 0.15, "grad_norm": 3.555083711210735, "learning_rate": 1.9198697918466585e-05, "loss": 0.9949, "step": 1009 }, { "epoch": 0.15, "grad_norm": 3.5581102126972795, "learning_rate": 1.9196752322894144e-05, "loss": 0.9737, "step": 1010 }, { "epoch": 0.15, "grad_norm": 3.4863765960327533, "learning_rate": 1.9194804467017916e-05, "loss": 1.0135, "step": 1011 }, { "epoch": 0.15, "grad_norm": 3.583370538658214, "learning_rate": 1.9192854351316627e-05, "loss": 0.952, "step": 1012 }, { "epoch": 0.16, "grad_norm": 3.8863410802663925, "learning_rate": 1.9190901976269564e-05, "loss": 0.9946, "step": 1013 }, { "epoch": 0.16, "grad_norm": 3.169190485865015, "learning_rate": 1.9188947342356562e-05, "loss": 0.9519, "step": 1014 }, { "epoch": 0.16, "grad_norm": 3.697746603380063, "learning_rate": 1.9186990450058018e-05, "loss": 1.1704, "step": 1015 }, { "epoch": 0.16, "grad_norm": 3.8443610504164885, "learning_rate": 1.918503129985488e-05, "loss": 0.9317, "step": 1016 }, { "epoch": 0.16, "grad_norm": 9.613406671233491, "learning_rate": 1.9183069892228648e-05, "loss": 1.0153, "step": 1017 }, { "epoch": 0.16, "grad_norm": 3.750460562880933, "learning_rate": 1.9181106227661394e-05, "loss": 1.0646, "step": 1018 }, { "epoch": 0.16, "grad_norm": 3.543565576301301, "learning_rate": 1.9179140306635715e-05, "loss": 1.0755, "step": 1019 }, { "epoch": 0.16, "grad_norm": 3.1179341803007268, "learning_rate": 1.917717212963479e-05, "loss": 1.0341, "step": 1020 }, { "epoch": 0.16, "grad_norm": 3.693231646113712, "learning_rate": 1.917520169714234e-05, "loss": 1.0512, "step": 1021 }, { "epoch": 0.16, "grad_norm": 3.6424860961845242, "learning_rate": 1.917322900964264e-05, "loss": 0.9632, "step": 1022 }, { "epoch": 0.16, "grad_norm": 3.143360794092663, "learning_rate": 1.9171254067620525e-05, "loss": 0.9626, "step": 1023 }, { "epoch": 0.16, "grad_norm": 6.019061378499708, "learning_rate": 1.9169276871561372e-05, "loss": 1.0185, "step": 1024 }, { "epoch": 0.16, "grad_norm": 3.7413802411315658, "learning_rate": 1.9167297421951133e-05, "loss": 0.9387, "step": 1025 }, { "epoch": 0.16, "grad_norm": 3.4166342976005435, "learning_rate": 1.916531571927629e-05, "loss": 0.9611, "step": 1026 }, { "epoch": 0.16, "grad_norm": 3.4654811937421526, "learning_rate": 1.9163331764023893e-05, "loss": 0.9905, "step": 1027 }, { "epoch": 0.16, "grad_norm": 3.3771874199837653, "learning_rate": 1.9161345556681548e-05, "loss": 0.9824, "step": 1028 }, { "epoch": 0.16, "grad_norm": 3.9825758010399266, "learning_rate": 1.91593570977374e-05, "loss": 0.9843, "step": 1029 }, { "epoch": 0.16, "grad_norm": 3.500299091233586, "learning_rate": 1.9157366387680164e-05, "loss": 1.0668, "step": 1030 }, { "epoch": 0.16, "grad_norm": 4.002409073404688, "learning_rate": 1.9155373426999096e-05, "loss": 1.0126, "step": 1031 }, { "epoch": 0.16, "grad_norm": 3.1178497772698615, "learning_rate": 1.9153378216184013e-05, "loss": 0.912, "step": 1032 }, { "epoch": 0.16, "grad_norm": 8.875971732474303, "learning_rate": 1.9151380755725282e-05, "loss": 1.1375, "step": 1033 }, { "epoch": 0.16, "grad_norm": 4.194747601119983, "learning_rate": 1.9149381046113817e-05, "loss": 0.9627, "step": 1034 }, { "epoch": 0.16, "grad_norm": 3.566551938326764, "learning_rate": 1.91473790878411e-05, "loss": 0.9576, "step": 1035 }, { "epoch": 0.16, "grad_norm": 3.7111810907560225, "learning_rate": 1.9145374881399144e-05, "loss": 0.8816, "step": 1036 }, { "epoch": 0.16, "grad_norm": 3.279089276894769, "learning_rate": 1.9143368427280542e-05, "loss": 1.0099, "step": 1037 }, { "epoch": 0.16, "grad_norm": 3.6872390618227047, "learning_rate": 1.914135972597841e-05, "loss": 1.006, "step": 1038 }, { "epoch": 0.16, "grad_norm": 3.4511173229460845, "learning_rate": 1.9139348777986443e-05, "loss": 0.9577, "step": 1039 }, { "epoch": 0.16, "grad_norm": 3.246134239880193, "learning_rate": 1.913733558379886e-05, "loss": 0.9671, "step": 1040 }, { "epoch": 0.16, "grad_norm": 3.16485398398124, "learning_rate": 1.9135320143910465e-05, "loss": 1.0102, "step": 1041 }, { "epoch": 0.16, "grad_norm": 3.498137213691302, "learning_rate": 1.9133302458816586e-05, "loss": 1.1029, "step": 1042 }, { "epoch": 0.16, "grad_norm": 3.9868504509756595, "learning_rate": 1.9131282529013114e-05, "loss": 0.9275, "step": 1043 }, { "epoch": 0.16, "grad_norm": 3.2756053563969894, "learning_rate": 1.9129260354996493e-05, "loss": 1.0949, "step": 1044 }, { "epoch": 0.16, "grad_norm": 3.7173141603029385, "learning_rate": 1.9127235937263716e-05, "loss": 1.0517, "step": 1045 }, { "epoch": 0.16, "grad_norm": 3.085399324277153, "learning_rate": 1.912520927631233e-05, "loss": 1.1233, "step": 1046 }, { "epoch": 0.16, "grad_norm": 4.118322366645487, "learning_rate": 1.9123180372640428e-05, "loss": 1.0455, "step": 1047 }, { "epoch": 0.16, "grad_norm": 3.29119929398604, "learning_rate": 1.912114922674666e-05, "loss": 0.9441, "step": 1048 }, { "epoch": 0.16, "grad_norm": 3.1222949030253933, "learning_rate": 1.9119115839130227e-05, "loss": 0.9119, "step": 1049 }, { "epoch": 0.16, "grad_norm": 3.436195624820573, "learning_rate": 1.9117080210290873e-05, "loss": 1.0198, "step": 1050 }, { "epoch": 0.16, "grad_norm": 3.5871822829406406, "learning_rate": 1.9115042340728904e-05, "loss": 1.0135, "step": 1051 }, { "epoch": 0.16, "grad_norm": 3.3799660086390766, "learning_rate": 1.9113002230945166e-05, "loss": 1.0078, "step": 1052 }, { "epoch": 0.16, "grad_norm": 3.2646985796547536, "learning_rate": 1.9110959881441064e-05, "loss": 0.8787, "step": 1053 }, { "epoch": 0.16, "grad_norm": 3.15046936919999, "learning_rate": 1.910891529271855e-05, "loss": 1.0025, "step": 1054 }, { "epoch": 0.16, "grad_norm": 3.7288170635268973, "learning_rate": 1.9106868465280125e-05, "loss": 0.87, "step": 1055 }, { "epoch": 0.16, "grad_norm": 3.454722575329552, "learning_rate": 1.9104819399628846e-05, "loss": 1.0159, "step": 1056 }, { "epoch": 0.16, "grad_norm": 3.3362831788943454, "learning_rate": 1.9102768096268312e-05, "loss": 1.0137, "step": 1057 }, { "epoch": 0.16, "grad_norm": 3.6138909028367387, "learning_rate": 1.9100714555702673e-05, "loss": 0.8808, "step": 1058 }, { "epoch": 0.16, "grad_norm": 3.289557942859989, "learning_rate": 1.909865877843664e-05, "loss": 0.9233, "step": 1059 }, { "epoch": 0.16, "grad_norm": 3.448109506123443, "learning_rate": 1.9096600764975458e-05, "loss": 1.0014, "step": 1060 }, { "epoch": 0.16, "grad_norm": 2.9931691189641203, "learning_rate": 1.9094540515824933e-05, "loss": 0.9862, "step": 1061 }, { "epoch": 0.16, "grad_norm": 3.22334136087793, "learning_rate": 1.909247803149141e-05, "loss": 1.0753, "step": 1062 }, { "epoch": 0.16, "grad_norm": 3.3609406665630064, "learning_rate": 1.9090413312481803e-05, "loss": 1.0485, "step": 1063 }, { "epoch": 0.16, "grad_norm": 3.3705566669272056, "learning_rate": 1.908834635930355e-05, "loss": 1.0542, "step": 1064 }, { "epoch": 0.16, "grad_norm": 3.5487167698301945, "learning_rate": 1.9086277172464657e-05, "loss": 0.9729, "step": 1065 }, { "epoch": 0.16, "grad_norm": 3.33753925640636, "learning_rate": 1.9084205752473665e-05, "loss": 1.0363, "step": 1066 }, { "epoch": 0.16, "grad_norm": 3.43946877330562, "learning_rate": 1.9082132099839678e-05, "loss": 1.053, "step": 1067 }, { "epoch": 0.16, "grad_norm": 3.5135991771708275, "learning_rate": 1.9080056215072335e-05, "loss": 1.0468, "step": 1068 }, { "epoch": 0.16, "grad_norm": 3.2571006947721535, "learning_rate": 1.9077978098681838e-05, "loss": 1.0529, "step": 1069 }, { "epoch": 0.16, "grad_norm": 3.041552540240493, "learning_rate": 1.9075897751178924e-05, "loss": 0.9847, "step": 1070 }, { "epoch": 0.16, "grad_norm": 3.571357087030962, "learning_rate": 1.9073815173074887e-05, "loss": 0.945, "step": 1071 }, { "epoch": 0.16, "grad_norm": 9.476556745270255, "learning_rate": 1.9071730364881564e-05, "loss": 1.0811, "step": 1072 }, { "epoch": 0.16, "grad_norm": 3.4951504929385124, "learning_rate": 1.9069643327111343e-05, "loss": 0.9277, "step": 1073 }, { "epoch": 0.16, "grad_norm": 3.124336692811566, "learning_rate": 1.9067554060277163e-05, "loss": 0.9597, "step": 1074 }, { "epoch": 0.16, "grad_norm": 3.527427864527887, "learning_rate": 1.90654625648925e-05, "loss": 1.0146, "step": 1075 }, { "epoch": 0.16, "grad_norm": 3.455911351310847, "learning_rate": 1.9063368841471394e-05, "loss": 0.9454, "step": 1076 }, { "epoch": 0.16, "grad_norm": 3.3105290669891643, "learning_rate": 1.9061272890528414e-05, "loss": 0.9481, "step": 1077 }, { "epoch": 0.17, "grad_norm": 3.2685108467907966, "learning_rate": 1.9059174712578692e-05, "loss": 0.9618, "step": 1078 }, { "epoch": 0.17, "grad_norm": 3.9964576477210114, "learning_rate": 1.90570743081379e-05, "loss": 1.0274, "step": 1079 }, { "epoch": 0.17, "grad_norm": 3.6542742179123198, "learning_rate": 1.905497167772226e-05, "loss": 0.9668, "step": 1080 }, { "epoch": 0.17, "grad_norm": 3.5924874165069554, "learning_rate": 1.9052866821848536e-05, "loss": 1.0075, "step": 1081 }, { "epoch": 0.17, "grad_norm": 3.276204761156857, "learning_rate": 1.9050759741034043e-05, "loss": 0.9846, "step": 1082 }, { "epoch": 0.17, "grad_norm": 3.327735856352041, "learning_rate": 1.904865043579664e-05, "loss": 0.9602, "step": 1083 }, { "epoch": 0.17, "grad_norm": 5.197330224276992, "learning_rate": 1.9046538906654745e-05, "loss": 1.0542, "step": 1084 }, { "epoch": 0.17, "grad_norm": 3.855628629594563, "learning_rate": 1.904442515412731e-05, "loss": 0.9439, "step": 1085 }, { "epoch": 0.17, "grad_norm": 11.381720411209631, "learning_rate": 1.9042309178733825e-05, "loss": 1.1813, "step": 1086 }, { "epoch": 0.17, "grad_norm": 4.714974701017107, "learning_rate": 1.904019098099435e-05, "loss": 1.0041, "step": 1087 }, { "epoch": 0.17, "grad_norm": 3.882190357917079, "learning_rate": 1.9038070561429468e-05, "loss": 0.986, "step": 1088 }, { "epoch": 0.17, "grad_norm": 3.4545158928815023, "learning_rate": 1.9035947920560327e-05, "loss": 1.0752, "step": 1089 }, { "epoch": 0.17, "grad_norm": 3.5499121222160874, "learning_rate": 1.903382305890861e-05, "loss": 1.0289, "step": 1090 }, { "epoch": 0.17, "grad_norm": 3.4587592105542635, "learning_rate": 1.903169597699655e-05, "loss": 0.9598, "step": 1091 }, { "epoch": 0.17, "grad_norm": 3.334344279452804, "learning_rate": 1.9029566675346916e-05, "loss": 0.9692, "step": 1092 }, { "epoch": 0.17, "grad_norm": 3.3315491706075857, "learning_rate": 1.9027435154483044e-05, "loss": 1.0866, "step": 1093 }, { "epoch": 0.17, "grad_norm": 3.383403403950702, "learning_rate": 1.9025301414928795e-05, "loss": 1.0768, "step": 1094 }, { "epoch": 0.17, "grad_norm": 3.440092757443575, "learning_rate": 1.9023165457208577e-05, "loss": 0.9649, "step": 1095 }, { "epoch": 0.17, "grad_norm": 3.193803361721852, "learning_rate": 1.9021027281847354e-05, "loss": 1.0055, "step": 1096 }, { "epoch": 0.17, "grad_norm": 3.809350222993406, "learning_rate": 1.9018886889370636e-05, "loss": 0.9537, "step": 1097 }, { "epoch": 0.17, "grad_norm": 3.502375873447698, "learning_rate": 1.9016744280304457e-05, "loss": 0.9742, "step": 1098 }, { "epoch": 0.17, "grad_norm": 3.4995535233150092, "learning_rate": 1.901459945517542e-05, "loss": 0.9055, "step": 1099 }, { "epoch": 0.17, "grad_norm": 3.4377117018648042, "learning_rate": 1.9012452414510667e-05, "loss": 1.0599, "step": 1100 }, { "epoch": 0.17, "grad_norm": 3.895500725772442, "learning_rate": 1.901030315883787e-05, "loss": 1.0256, "step": 1101 }, { "epoch": 0.17, "grad_norm": 3.370139212744162, "learning_rate": 1.9008151688685255e-05, "loss": 0.9934, "step": 1102 }, { "epoch": 0.17, "grad_norm": 3.177175252089327, "learning_rate": 1.9005998004581606e-05, "loss": 1.0087, "step": 1103 }, { "epoch": 0.17, "grad_norm": 3.0968416904480254, "learning_rate": 1.9003842107056224e-05, "loss": 0.9516, "step": 1104 }, { "epoch": 0.17, "grad_norm": 4.683234071334093, "learning_rate": 1.9001683996638978e-05, "loss": 0.9738, "step": 1105 }, { "epoch": 0.17, "grad_norm": 3.7105828983358378, "learning_rate": 1.8999523673860266e-05, "loss": 0.9792, "step": 1106 }, { "epoch": 0.17, "grad_norm": 3.890400305329194, "learning_rate": 1.8997361139251036e-05, "loss": 1.0313, "step": 1107 }, { "epoch": 0.17, "grad_norm": 3.4767511809652216, "learning_rate": 1.8995196393342778e-05, "loss": 1.1165, "step": 1108 }, { "epoch": 0.17, "grad_norm": 3.2470769090829967, "learning_rate": 1.899302943666753e-05, "loss": 1.0464, "step": 1109 }, { "epoch": 0.17, "grad_norm": 3.1368112228941754, "learning_rate": 1.899086026975786e-05, "loss": 0.9509, "step": 1110 }, { "epoch": 0.17, "grad_norm": 3.3698732139606413, "learning_rate": 1.89886888931469e-05, "loss": 1.0738, "step": 1111 }, { "epoch": 0.17, "grad_norm": 3.37952733028334, "learning_rate": 1.89865153073683e-05, "loss": 0.8875, "step": 1112 }, { "epoch": 0.17, "grad_norm": 3.3612014588697523, "learning_rate": 1.898433951295628e-05, "loss": 1.0317, "step": 1113 }, { "epoch": 0.17, "grad_norm": 3.574115698052471, "learning_rate": 1.8982161510445577e-05, "loss": 1.0568, "step": 1114 }, { "epoch": 0.17, "grad_norm": 3.5775869562020173, "learning_rate": 1.8979981300371492e-05, "loss": 1.0596, "step": 1115 }, { "epoch": 0.17, "grad_norm": 3.464328670626159, "learning_rate": 1.8977798883269858e-05, "loss": 0.9411, "step": 1116 }, { "epoch": 0.17, "grad_norm": 3.801939625366067, "learning_rate": 1.897561425967705e-05, "loss": 0.9531, "step": 1117 }, { "epoch": 0.17, "grad_norm": 3.8808600285040273, "learning_rate": 1.8973427430129986e-05, "loss": 1.0827, "step": 1118 }, { "epoch": 0.17, "grad_norm": 3.4145714567838223, "learning_rate": 1.8971238395166128e-05, "loss": 0.9949, "step": 1119 }, { "epoch": 0.17, "grad_norm": 3.097114782795992, "learning_rate": 1.8969047155323484e-05, "loss": 0.9522, "step": 1120 }, { "epoch": 0.17, "grad_norm": 3.3808431452573715, "learning_rate": 1.896685371114059e-05, "loss": 1.0558, "step": 1121 }, { "epoch": 0.17, "grad_norm": 3.213562730595572, "learning_rate": 1.8964658063156542e-05, "loss": 0.9506, "step": 1122 }, { "epoch": 0.17, "grad_norm": 3.23503323959287, "learning_rate": 1.8962460211910968e-05, "loss": 0.9897, "step": 1123 }, { "epoch": 0.17, "grad_norm": 3.6174475409775577, "learning_rate": 1.896026015794403e-05, "loss": 0.9811, "step": 1124 }, { "epoch": 0.17, "grad_norm": 3.3036656051786983, "learning_rate": 1.8958057901796446e-05, "loss": 1.0645, "step": 1125 }, { "epoch": 0.17, "grad_norm": 3.317419068376816, "learning_rate": 1.895585344400947e-05, "loss": 1.0342, "step": 1126 }, { "epoch": 0.17, "grad_norm": 3.558894837151238, "learning_rate": 1.8953646785124895e-05, "loss": 0.9451, "step": 1127 }, { "epoch": 0.17, "grad_norm": 4.262813533733853, "learning_rate": 1.8951437925685053e-05, "loss": 1.0044, "step": 1128 }, { "epoch": 0.17, "grad_norm": 4.615319625164075, "learning_rate": 1.8949226866232822e-05, "loss": 1.0139, "step": 1129 }, { "epoch": 0.17, "grad_norm": 3.7198693704317325, "learning_rate": 1.8947013607311614e-05, "loss": 1.1314, "step": 1130 }, { "epoch": 0.17, "grad_norm": 4.179425008195092, "learning_rate": 1.8944798149465394e-05, "loss": 1.066, "step": 1131 }, { "epoch": 0.17, "grad_norm": 3.3484987417758982, "learning_rate": 1.8942580493238655e-05, "loss": 1.0742, "step": 1132 }, { "epoch": 0.17, "grad_norm": 3.610034457301421, "learning_rate": 1.8940360639176435e-05, "loss": 1.0215, "step": 1133 }, { "epoch": 0.17, "grad_norm": 3.4479854456032344, "learning_rate": 1.893813858782431e-05, "loss": 1.096, "step": 1134 }, { "epoch": 0.17, "grad_norm": 2.989140158166724, "learning_rate": 1.89359143397284e-05, "loss": 0.9756, "step": 1135 }, { "epoch": 0.17, "grad_norm": 3.5927108470611304, "learning_rate": 1.8933687895435363e-05, "loss": 0.9714, "step": 1136 }, { "epoch": 0.17, "grad_norm": 3.4687364701321726, "learning_rate": 1.8931459255492396e-05, "loss": 1.0522, "step": 1137 }, { "epoch": 0.17, "grad_norm": 3.355312740447251, "learning_rate": 1.8929228420447234e-05, "loss": 1.0474, "step": 1138 }, { "epoch": 0.17, "grad_norm": 5.329216562737884, "learning_rate": 1.8926995390848158e-05, "loss": 0.9987, "step": 1139 }, { "epoch": 0.17, "grad_norm": 3.4784093304330126, "learning_rate": 1.892476016724398e-05, "loss": 0.9661, "step": 1140 }, { "epoch": 0.17, "grad_norm": 4.578408602821964, "learning_rate": 1.892252275018406e-05, "loss": 0.9503, "step": 1141 }, { "epoch": 0.17, "grad_norm": 3.291121071892807, "learning_rate": 1.892028314021829e-05, "loss": 1.0053, "step": 1142 }, { "epoch": 0.17, "grad_norm": 3.6034792441405075, "learning_rate": 1.89180413378971e-05, "loss": 1.0765, "step": 1143 }, { "epoch": 0.18, "grad_norm": 3.558401358394951, "learning_rate": 1.8915797343771464e-05, "loss": 1.0834, "step": 1144 }, { "epoch": 0.18, "grad_norm": 3.230448244024029, "learning_rate": 1.89135511583929e-05, "loss": 1.0834, "step": 1145 }, { "epoch": 0.18, "grad_norm": 3.040874118640548, "learning_rate": 1.8911302782313442e-05, "loss": 0.9802, "step": 1146 }, { "epoch": 0.18, "grad_norm": 3.330212581063196, "learning_rate": 1.8909052216085695e-05, "loss": 1.0496, "step": 1147 }, { "epoch": 0.18, "grad_norm": 3.276501255977922, "learning_rate": 1.8906799460262776e-05, "loss": 0.9585, "step": 1148 }, { "epoch": 0.18, "grad_norm": 3.7767677443907792, "learning_rate": 1.8904544515398348e-05, "loss": 0.8529, "step": 1149 }, { "epoch": 0.18, "grad_norm": 3.339419253619894, "learning_rate": 1.8902287382046613e-05, "loss": 0.9477, "step": 1150 }, { "epoch": 0.18, "grad_norm": 14.351120059873713, "learning_rate": 1.8900028060762317e-05, "loss": 1.3391, "step": 1151 }, { "epoch": 0.18, "grad_norm": 3.7045864230149754, "learning_rate": 1.8897766552100735e-05, "loss": 1.0245, "step": 1152 }, { "epoch": 0.18, "grad_norm": 3.5387993543284884, "learning_rate": 1.889550285661768e-05, "loss": 1.1662, "step": 1153 }, { "epoch": 0.18, "grad_norm": 3.363800219601907, "learning_rate": 1.8893236974869503e-05, "loss": 0.9452, "step": 1154 }, { "epoch": 0.18, "grad_norm": 3.775347794310135, "learning_rate": 1.8890968907413103e-05, "loss": 1.0074, "step": 1155 }, { "epoch": 0.18, "grad_norm": 3.444627030455306, "learning_rate": 1.88886986548059e-05, "loss": 1.0151, "step": 1156 }, { "epoch": 0.18, "grad_norm": 3.4675043582061207, "learning_rate": 1.888642621760586e-05, "loss": 0.9661, "step": 1157 }, { "epoch": 0.18, "grad_norm": 3.7803298567162296, "learning_rate": 1.8884151596371487e-05, "loss": 1.0353, "step": 1158 }, { "epoch": 0.18, "grad_norm": 3.3184377487122805, "learning_rate": 1.888187479166182e-05, "loss": 0.9759, "step": 1159 }, { "epoch": 0.18, "grad_norm": 3.816499168073624, "learning_rate": 1.8879595804036424e-05, "loss": 1.0366, "step": 1160 }, { "epoch": 0.18, "grad_norm": 3.2112431734482842, "learning_rate": 1.8877314634055418e-05, "loss": 1.0806, "step": 1161 }, { "epoch": 0.18, "grad_norm": 2.818032573141438, "learning_rate": 1.8875031282279452e-05, "loss": 0.9278, "step": 1162 }, { "epoch": 0.18, "grad_norm": 3.3892480376956096, "learning_rate": 1.88727457492697e-05, "loss": 1.0231, "step": 1163 }, { "epoch": 0.18, "grad_norm": 3.338034412494329, "learning_rate": 1.8870458035587896e-05, "loss": 0.9942, "step": 1164 }, { "epoch": 0.18, "grad_norm": 3.4858890392363016, "learning_rate": 1.886816814179629e-05, "loss": 1.0469, "step": 1165 }, { "epoch": 0.18, "grad_norm": 2.932865879989182, "learning_rate": 1.8865876068457663e-05, "loss": 0.9496, "step": 1166 }, { "epoch": 0.18, "grad_norm": 20.12195654806366, "learning_rate": 1.8863581816135355e-05, "loss": 1.2516, "step": 1167 }, { "epoch": 0.18, "grad_norm": 3.3345948724808117, "learning_rate": 1.8861285385393226e-05, "loss": 1.0154, "step": 1168 }, { "epoch": 0.18, "grad_norm": 3.5548649180893577, "learning_rate": 1.8858986776795673e-05, "loss": 0.945, "step": 1169 }, { "epoch": 0.18, "grad_norm": 3.05695734572861, "learning_rate": 1.8856685990907627e-05, "loss": 0.9177, "step": 1170 }, { "epoch": 0.18, "grad_norm": 3.3215030028624435, "learning_rate": 1.8854383028294563e-05, "loss": 0.914, "step": 1171 }, { "epoch": 0.18, "grad_norm": 3.534699822951794, "learning_rate": 1.885207788952248e-05, "loss": 0.9461, "step": 1172 }, { "epoch": 0.18, "grad_norm": 8.630422675866747, "learning_rate": 1.884977057515792e-05, "loss": 1.0953, "step": 1173 }, { "epoch": 0.18, "grad_norm": 3.3762286736972444, "learning_rate": 1.884746108576795e-05, "loss": 1.0774, "step": 1174 }, { "epoch": 0.18, "grad_norm": 3.2132032320980506, "learning_rate": 1.8845149421920183e-05, "loss": 1.0781, "step": 1175 }, { "epoch": 0.18, "grad_norm": 3.140304792219557, "learning_rate": 1.884283558418276e-05, "loss": 0.9953, "step": 1176 }, { "epoch": 0.18, "grad_norm": 3.586259201431184, "learning_rate": 1.8840519573124355e-05, "loss": 0.9602, "step": 1177 }, { "epoch": 0.18, "grad_norm": 3.0692544921950002, "learning_rate": 1.8838201389314186e-05, "loss": 0.9531, "step": 1178 }, { "epoch": 0.18, "grad_norm": 4.059529893306157, "learning_rate": 1.8835881033321987e-05, "loss": 0.9284, "step": 1179 }, { "epoch": 0.18, "grad_norm": 3.8748227160913618, "learning_rate": 1.883355850571804e-05, "loss": 0.9953, "step": 1180 }, { "epoch": 0.18, "grad_norm": 3.547535182182359, "learning_rate": 1.8831233807073162e-05, "loss": 1.0275, "step": 1181 }, { "epoch": 0.18, "grad_norm": 3.332665967214049, "learning_rate": 1.8828906937958697e-05, "loss": 1.0284, "step": 1182 }, { "epoch": 0.18, "grad_norm": 3.997463814001846, "learning_rate": 1.882657789894652e-05, "loss": 1.0457, "step": 1183 }, { "epoch": 0.18, "grad_norm": 3.167722059482026, "learning_rate": 1.882424669060905e-05, "loss": 0.9619, "step": 1184 }, { "epoch": 0.18, "grad_norm": 3.3420295940411417, "learning_rate": 1.8821913313519222e-05, "loss": 1.0286, "step": 1185 }, { "epoch": 0.18, "grad_norm": 3.4323628920909, "learning_rate": 1.8819577768250527e-05, "loss": 0.9414, "step": 1186 }, { "epoch": 0.18, "grad_norm": 3.471810980202804, "learning_rate": 1.8817240055376967e-05, "loss": 1.0593, "step": 1187 }, { "epoch": 0.18, "grad_norm": 4.014072585779561, "learning_rate": 1.8814900175473095e-05, "loss": 1.0279, "step": 1188 }, { "epoch": 0.18, "grad_norm": 3.5142431015633555, "learning_rate": 1.881255812911398e-05, "loss": 0.9126, "step": 1189 }, { "epoch": 0.18, "grad_norm": 3.1249815251555946, "learning_rate": 1.8810213916875232e-05, "loss": 0.9398, "step": 1190 }, { "epoch": 0.18, "grad_norm": 3.095309690894207, "learning_rate": 1.8807867539333e-05, "loss": 1.0398, "step": 1191 }, { "epoch": 0.18, "grad_norm": 3.2039191939862124, "learning_rate": 1.8805518997063947e-05, "loss": 1.0105, "step": 1192 }, { "epoch": 0.18, "grad_norm": 3.3474256293689044, "learning_rate": 1.8803168290645287e-05, "loss": 0.9162, "step": 1193 }, { "epoch": 0.18, "grad_norm": 3.0466196271546186, "learning_rate": 1.8800815420654758e-05, "loss": 0.9939, "step": 1194 }, { "epoch": 0.18, "grad_norm": 3.346909179921009, "learning_rate": 1.8798460387670622e-05, "loss": 0.9594, "step": 1195 }, { "epoch": 0.18, "grad_norm": 3.486888312468145, "learning_rate": 1.8796103192271688e-05, "loss": 1.064, "step": 1196 }, { "epoch": 0.18, "grad_norm": 3.5432823849117385, "learning_rate": 1.8793743835037287e-05, "loss": 1.0371, "step": 1197 }, { "epoch": 0.18, "grad_norm": 3.7968288431929107, "learning_rate": 1.8791382316547276e-05, "loss": 1.0197, "step": 1198 }, { "epoch": 0.18, "grad_norm": 3.7397975872652305, "learning_rate": 1.878901863738206e-05, "loss": 1.0208, "step": 1199 }, { "epoch": 0.18, "grad_norm": 3.039320733733214, "learning_rate": 1.8786652798122557e-05, "loss": 1.0011, "step": 1200 }, { "epoch": 0.18, "grad_norm": 3.443305156795146, "learning_rate": 1.878428479935023e-05, "loss": 1.0169, "step": 1201 }, { "epoch": 0.18, "grad_norm": 3.4890962089355315, "learning_rate": 1.878191464164706e-05, "loss": 1.0539, "step": 1202 }, { "epoch": 0.18, "grad_norm": 3.3572622978384468, "learning_rate": 1.8779542325595572e-05, "loss": 1.0377, "step": 1203 }, { "epoch": 0.18, "grad_norm": 3.4645394707459998, "learning_rate": 1.877716785177881e-05, "loss": 1.0387, "step": 1204 }, { "epoch": 0.18, "grad_norm": 3.4755656750309, "learning_rate": 1.8774791220780358e-05, "loss": 1.0699, "step": 1205 }, { "epoch": 0.18, "grad_norm": 3.2676798552234088, "learning_rate": 1.877241243318432e-05, "loss": 0.9958, "step": 1206 }, { "epoch": 0.18, "grad_norm": 3.379353075378151, "learning_rate": 1.8770031489575336e-05, "loss": 0.9279, "step": 1207 }, { "epoch": 0.18, "grad_norm": 4.009969046904054, "learning_rate": 1.876764839053858e-05, "loss": 0.9824, "step": 1208 }, { "epoch": 0.19, "grad_norm": 3.195814747484976, "learning_rate": 1.8765263136659747e-05, "loss": 1.0223, "step": 1209 }, { "epoch": 0.19, "grad_norm": 2.98702869224199, "learning_rate": 1.8762875728525063e-05, "loss": 0.9445, "step": 1210 }, { "epoch": 0.19, "grad_norm": 3.5172367808354177, "learning_rate": 1.876048616672129e-05, "loss": 0.8926, "step": 1211 }, { "epoch": 0.19, "grad_norm": 3.4871172798691954, "learning_rate": 1.875809445183572e-05, "loss": 1.0432, "step": 1212 }, { "epoch": 0.19, "grad_norm": 3.5064858806387686, "learning_rate": 1.8755700584456156e-05, "loss": 1.0091, "step": 1213 }, { "epoch": 0.19, "grad_norm": 3.1161036668435775, "learning_rate": 1.8753304565170953e-05, "loss": 0.987, "step": 1214 }, { "epoch": 0.19, "grad_norm": 3.4399247964110287, "learning_rate": 1.8750906394568984e-05, "loss": 1.132, "step": 1215 }, { "epoch": 0.19, "grad_norm": 5.439032864890778, "learning_rate": 1.8748506073239654e-05, "loss": 0.8858, "step": 1216 }, { "epoch": 0.19, "grad_norm": 3.9288136576207817, "learning_rate": 1.874610360177289e-05, "loss": 0.9878, "step": 1217 }, { "epoch": 0.19, "grad_norm": 5.412264557712734, "learning_rate": 1.8743698980759154e-05, "loss": 1.0101, "step": 1218 }, { "epoch": 0.19, "grad_norm": 3.0818804914527385, "learning_rate": 1.8741292210789435e-05, "loss": 0.9747, "step": 1219 }, { "epoch": 0.19, "grad_norm": 3.3379575642167576, "learning_rate": 1.873888329245525e-05, "loss": 1.0409, "step": 1220 }, { "epoch": 0.19, "grad_norm": 3.108719942681841, "learning_rate": 1.8736472226348643e-05, "loss": 0.8671, "step": 1221 }, { "epoch": 0.19, "grad_norm": 3.433163389985319, "learning_rate": 1.873405901306219e-05, "loss": 1.0531, "step": 1222 }, { "epoch": 0.19, "grad_norm": 3.367416135930543, "learning_rate": 1.873164365318898e-05, "loss": 1.115, "step": 1223 }, { "epoch": 0.19, "grad_norm": 3.1944459496476703, "learning_rate": 1.8729226147322653e-05, "loss": 1.0592, "step": 1224 }, { "epoch": 0.19, "grad_norm": 3.3534934567247827, "learning_rate": 1.8726806496057356e-05, "loss": 0.9829, "step": 1225 }, { "epoch": 0.19, "grad_norm": 3.4050693643260876, "learning_rate": 1.8724384699987776e-05, "loss": 1.0231, "step": 1226 }, { "epoch": 0.19, "grad_norm": 3.2027668780710354, "learning_rate": 1.872196075970912e-05, "loss": 0.9412, "step": 1227 }, { "epoch": 0.19, "grad_norm": 3.225637604293947, "learning_rate": 1.871953467581713e-05, "loss": 1.0382, "step": 1228 }, { "epoch": 0.19, "grad_norm": 3.0737196348462033, "learning_rate": 1.8717106448908065e-05, "loss": 1.0373, "step": 1229 }, { "epoch": 0.19, "grad_norm": 3.091074286664361, "learning_rate": 1.871467607957871e-05, "loss": 1.0822, "step": 1230 }, { "epoch": 0.19, "grad_norm": 3.1267167205958373, "learning_rate": 1.871224356842639e-05, "loss": 0.9143, "step": 1231 }, { "epoch": 0.19, "grad_norm": 3.697620074634605, "learning_rate": 1.8709808916048948e-05, "loss": 0.9823, "step": 1232 }, { "epoch": 0.19, "grad_norm": 3.5889398544631512, "learning_rate": 1.8707372123044746e-05, "loss": 0.9262, "step": 1233 }, { "epoch": 0.19, "grad_norm": 3.5183700822735426, "learning_rate": 1.8704933190012683e-05, "loss": 1.0104, "step": 1234 }, { "epoch": 0.19, "grad_norm": 3.0718014289597306, "learning_rate": 1.8702492117552185e-05, "loss": 0.9683, "step": 1235 }, { "epoch": 0.19, "grad_norm": 3.6196911699554666, "learning_rate": 1.870004890626319e-05, "loss": 0.8832, "step": 1236 }, { "epoch": 0.19, "grad_norm": 3.072246372090567, "learning_rate": 1.869760355674618e-05, "loss": 0.8994, "step": 1237 }, { "epoch": 0.19, "grad_norm": 3.1411597515237917, "learning_rate": 1.869515606960215e-05, "loss": 0.9609, "step": 1238 }, { "epoch": 0.19, "grad_norm": 3.5541454860928097, "learning_rate": 1.869270644543262e-05, "loss": 1.1036, "step": 1239 }, { "epoch": 0.19, "grad_norm": 3.4779650247855365, "learning_rate": 1.869025468483964e-05, "loss": 1.0456, "step": 1240 }, { "epoch": 0.19, "grad_norm": 3.2139546208099206, "learning_rate": 1.868780078842579e-05, "loss": 1.021, "step": 1241 }, { "epoch": 0.19, "grad_norm": 3.225726212496146, "learning_rate": 1.8685344756794163e-05, "loss": 0.9912, "step": 1242 }, { "epoch": 0.19, "grad_norm": 3.186249770125133, "learning_rate": 1.8682886590548385e-05, "loss": 0.9572, "step": 1243 }, { "epoch": 0.19, "grad_norm": 3.422670951947607, "learning_rate": 1.8680426290292603e-05, "loss": 0.9215, "step": 1244 }, { "epoch": 0.19, "grad_norm": 3.374702483528041, "learning_rate": 1.867796385663149e-05, "loss": 1.0541, "step": 1245 }, { "epoch": 0.19, "grad_norm": 3.7650617765604655, "learning_rate": 1.8675499290170243e-05, "loss": 0.964, "step": 1246 }, { "epoch": 0.19, "grad_norm": 3.366569229191678, "learning_rate": 1.8673032591514586e-05, "loss": 0.9945, "step": 1247 }, { "epoch": 0.19, "grad_norm": 3.3031430086247657, "learning_rate": 1.8670563761270762e-05, "loss": 1.0205, "step": 1248 }, { "epoch": 0.19, "grad_norm": 3.004205593212754, "learning_rate": 1.866809280004554e-05, "loss": 0.8999, "step": 1249 }, { "epoch": 0.19, "grad_norm": 3.9436629690025033, "learning_rate": 1.8665619708446216e-05, "loss": 0.9898, "step": 1250 }, { "epoch": 0.19, "grad_norm": 3.263914489945494, "learning_rate": 1.86631444870806e-05, "loss": 0.9416, "step": 1251 }, { "epoch": 0.19, "grad_norm": 3.388649773428647, "learning_rate": 1.8660667136557038e-05, "loss": 1.0842, "step": 1252 }, { "epoch": 0.19, "grad_norm": 3.284605163224415, "learning_rate": 1.865818765748439e-05, "loss": 0.9032, "step": 1253 }, { "epoch": 0.19, "grad_norm": 3.36238307880044, "learning_rate": 1.8655706050472045e-05, "loss": 0.9571, "step": 1254 }, { "epoch": 0.19, "grad_norm": 3.3955441759934106, "learning_rate": 1.8653222316129914e-05, "loss": 0.9705, "step": 1255 }, { "epoch": 0.19, "grad_norm": 3.4215491117960726, "learning_rate": 1.865073645506842e-05, "loss": 0.96, "step": 1256 }, { "epoch": 0.19, "grad_norm": 3.0799731990389914, "learning_rate": 1.864824846789853e-05, "loss": 0.9825, "step": 1257 }, { "epoch": 0.19, "grad_norm": 3.16621852615012, "learning_rate": 1.8645758355231712e-05, "loss": 0.9223, "step": 1258 }, { "epoch": 0.19, "grad_norm": 2.9485309307936913, "learning_rate": 1.8643266117679973e-05, "loss": 0.9407, "step": 1259 }, { "epoch": 0.19, "grad_norm": 3.0035964787739, "learning_rate": 1.864077175585583e-05, "loss": 1.0411, "step": 1260 }, { "epoch": 0.19, "grad_norm": 3.4098162622882207, "learning_rate": 1.863827527037233e-05, "loss": 0.9541, "step": 1261 }, { "epoch": 0.19, "grad_norm": 3.11021479917164, "learning_rate": 1.8635776661843037e-05, "loss": 0.9688, "step": 1262 }, { "epoch": 0.19, "grad_norm": 11.319618014901573, "learning_rate": 1.863327593088204e-05, "loss": 1.1829, "step": 1263 }, { "epoch": 0.19, "grad_norm": 3.3061764646598077, "learning_rate": 1.8630773078103947e-05, "loss": 0.8817, "step": 1264 }, { "epoch": 0.19, "grad_norm": 3.189631081214496, "learning_rate": 1.8628268104123895e-05, "loss": 0.9856, "step": 1265 }, { "epoch": 0.19, "grad_norm": 6.2252370788231, "learning_rate": 1.8625761009557527e-05, "loss": 1.0615, "step": 1266 }, { "epoch": 0.19, "grad_norm": 4.112050914940334, "learning_rate": 1.8623251795021026e-05, "loss": 1.0279, "step": 1267 }, { "epoch": 0.19, "grad_norm": 3.472255495162265, "learning_rate": 1.8620740461131078e-05, "loss": 1.02, "step": 1268 }, { "epoch": 0.19, "grad_norm": 3.3616466067796567, "learning_rate": 1.8618227008504903e-05, "loss": 1.0231, "step": 1269 }, { "epoch": 0.19, "grad_norm": 3.135356926756423, "learning_rate": 1.861571143776024e-05, "loss": 0.9841, "step": 1270 }, { "epoch": 0.19, "grad_norm": 3.1447769298644737, "learning_rate": 1.8613193749515336e-05, "loss": 0.9787, "step": 1271 }, { "epoch": 0.19, "grad_norm": 2.9614186561247378, "learning_rate": 1.861067394438898e-05, "loss": 1.0221, "step": 1272 }, { "epoch": 0.19, "grad_norm": 3.629189000456825, "learning_rate": 1.860815202300046e-05, "loss": 1.0493, "step": 1273 }, { "epoch": 0.2, "grad_norm": 3.5198156291852416, "learning_rate": 1.86056279859696e-05, "loss": 1.1185, "step": 1274 }, { "epoch": 0.2, "grad_norm": 3.228813921955044, "learning_rate": 1.860310183391673e-05, "loss": 1.0301, "step": 1275 }, { "epoch": 0.2, "grad_norm": 3.294294729255725, "learning_rate": 1.8600573567462722e-05, "loss": 1.0082, "step": 1276 }, { "epoch": 0.2, "grad_norm": 9.191062319084121, "learning_rate": 1.8598043187228937e-05, "loss": 1.1555, "step": 1277 }, { "epoch": 0.2, "grad_norm": 3.2943202829958955, "learning_rate": 1.8595510693837277e-05, "loss": 0.9557, "step": 1278 }, { "epoch": 0.2, "grad_norm": 3.3228994938865206, "learning_rate": 1.8592976087910162e-05, "loss": 1.0104, "step": 1279 }, { "epoch": 0.2, "grad_norm": 3.4589290611274968, "learning_rate": 1.8590439370070518e-05, "loss": 1.0474, "step": 1280 }, { "epoch": 0.2, "grad_norm": 3.089947078240719, "learning_rate": 1.858790054094181e-05, "loss": 0.8952, "step": 1281 }, { "epoch": 0.2, "grad_norm": 3.223869917514333, "learning_rate": 1.8585359601148005e-05, "loss": 0.9582, "step": 1282 }, { "epoch": 0.2, "grad_norm": 3.5241596955063565, "learning_rate": 1.8582816551313594e-05, "loss": 1.0233, "step": 1283 }, { "epoch": 0.2, "grad_norm": 3.1393632796116306, "learning_rate": 1.858027139206359e-05, "loss": 0.9223, "step": 1284 }, { "epoch": 0.2, "grad_norm": 3.0147943589181088, "learning_rate": 1.857772412402352e-05, "loss": 0.9559, "step": 1285 }, { "epoch": 0.2, "grad_norm": 3.76710874031879, "learning_rate": 1.857517474781943e-05, "loss": 1.0139, "step": 1286 }, { "epoch": 0.2, "grad_norm": 3.012356243143458, "learning_rate": 1.857262326407789e-05, "loss": 0.9859, "step": 1287 }, { "epoch": 0.2, "grad_norm": 3.214110568695533, "learning_rate": 1.8570069673425978e-05, "loss": 1.0213, "step": 1288 }, { "epoch": 0.2, "grad_norm": 3.4750350660104727, "learning_rate": 1.8567513976491298e-05, "loss": 0.988, "step": 1289 }, { "epoch": 0.2, "grad_norm": 10.760410578608612, "learning_rate": 1.856495617390197e-05, "loss": 1.2646, "step": 1290 }, { "epoch": 0.2, "grad_norm": 3.1666923521830657, "learning_rate": 1.856239626628662e-05, "loss": 0.9592, "step": 1291 }, { "epoch": 0.2, "grad_norm": 4.017141775920228, "learning_rate": 1.8559834254274413e-05, "loss": 1.0568, "step": 1292 }, { "epoch": 0.2, "grad_norm": 3.2580613936555403, "learning_rate": 1.8557270138495017e-05, "loss": 1.0296, "step": 1293 }, { "epoch": 0.2, "grad_norm": 6.722825184968627, "learning_rate": 1.8554703919578617e-05, "loss": 1.0888, "step": 1294 }, { "epoch": 0.2, "grad_norm": 3.8520569991398985, "learning_rate": 1.855213559815592e-05, "loss": 1.0294, "step": 1295 }, { "epoch": 0.2, "grad_norm": 3.4248927635388777, "learning_rate": 1.8549565174858148e-05, "loss": 1.0494, "step": 1296 }, { "epoch": 0.2, "grad_norm": 3.382377772814438, "learning_rate": 1.8546992650317035e-05, "loss": 0.9951, "step": 1297 }, { "epoch": 0.2, "grad_norm": 3.3623590404197627, "learning_rate": 1.854441802516484e-05, "loss": 1.0029, "step": 1298 }, { "epoch": 0.2, "grad_norm": 12.938670771717353, "learning_rate": 1.854184130003433e-05, "loss": 1.1985, "step": 1299 }, { "epoch": 0.2, "grad_norm": 3.177523092609845, "learning_rate": 1.8539262475558794e-05, "loss": 1.0393, "step": 1300 }, { "epoch": 0.2, "grad_norm": 3.142874464959742, "learning_rate": 1.8536681552372035e-05, "loss": 1.0258, "step": 1301 }, { "epoch": 0.2, "grad_norm": 3.1804710183341443, "learning_rate": 1.8534098531108374e-05, "loss": 0.8893, "step": 1302 }, { "epoch": 0.2, "grad_norm": 7.385721670884511, "learning_rate": 1.8531513412402635e-05, "loss": 1.1044, "step": 1303 }, { "epoch": 0.2, "grad_norm": 3.1355109726054384, "learning_rate": 1.852892619689018e-05, "loss": 1.0309, "step": 1304 }, { "epoch": 0.2, "grad_norm": 3.2700105399947113, "learning_rate": 1.8526336885206863e-05, "loss": 1.0062, "step": 1305 }, { "epoch": 0.2, "grad_norm": 3.83603218845176, "learning_rate": 1.8523745477989074e-05, "loss": 0.9573, "step": 1306 }, { "epoch": 0.2, "grad_norm": 3.274591564653697, "learning_rate": 1.85211519758737e-05, "loss": 0.9857, "step": 1307 }, { "epoch": 0.2, "grad_norm": 3.738378402447023, "learning_rate": 1.851855637949816e-05, "loss": 0.9904, "step": 1308 }, { "epoch": 0.2, "grad_norm": 3.4655143401997606, "learning_rate": 1.851595868950037e-05, "loss": 1.032, "step": 1309 }, { "epoch": 0.2, "grad_norm": 3.267631524880856, "learning_rate": 1.8513358906518773e-05, "loss": 0.9451, "step": 1310 }, { "epoch": 0.2, "grad_norm": 3.1241374456914595, "learning_rate": 1.8510757031192325e-05, "loss": 0.9012, "step": 1311 }, { "epoch": 0.2, "grad_norm": 3.1106665906055904, "learning_rate": 1.850815306416049e-05, "loss": 0.9004, "step": 1312 }, { "epoch": 0.2, "grad_norm": 4.57024670362329, "learning_rate": 1.8505547006063254e-05, "loss": 1.0498, "step": 1313 }, { "epoch": 0.2, "grad_norm": 3.1600728276232752, "learning_rate": 1.8502938857541112e-05, "loss": 1.0602, "step": 1314 }, { "epoch": 0.2, "grad_norm": 3.301046353011309, "learning_rate": 1.850032861923507e-05, "loss": 1.1654, "step": 1315 }, { "epoch": 0.2, "grad_norm": 3.238340991080974, "learning_rate": 1.8497716291786653e-05, "loss": 0.9199, "step": 1316 }, { "epoch": 0.2, "grad_norm": 2.824416857467904, "learning_rate": 1.8495101875837903e-05, "loss": 1.0123, "step": 1317 }, { "epoch": 0.2, "grad_norm": 2.962611707799049, "learning_rate": 1.8492485372031363e-05, "loss": 0.9427, "step": 1318 }, { "epoch": 0.2, "grad_norm": 3.2661569512729365, "learning_rate": 1.84898667810101e-05, "loss": 1.0405, "step": 1319 }, { "epoch": 0.2, "grad_norm": 3.0660251603874067, "learning_rate": 1.8487246103417692e-05, "loss": 0.8864, "step": 1320 }, { "epoch": 0.2, "grad_norm": 3.321859969901298, "learning_rate": 1.848462333989822e-05, "loss": 1.0434, "step": 1321 }, { "epoch": 0.2, "grad_norm": 3.5085205805452113, "learning_rate": 1.8481998491096294e-05, "loss": 0.992, "step": 1322 }, { "epoch": 0.2, "grad_norm": 3.729147238125545, "learning_rate": 1.8479371557657028e-05, "loss": 1.0681, "step": 1323 }, { "epoch": 0.2, "grad_norm": 3.314751970939591, "learning_rate": 1.847674254022604e-05, "loss": 0.9572, "step": 1324 }, { "epoch": 0.2, "grad_norm": 3.0172757269056842, "learning_rate": 1.847411143944948e-05, "loss": 0.9826, "step": 1325 }, { "epoch": 0.2, "grad_norm": 3.285621040400702, "learning_rate": 1.847147825597399e-05, "loss": 1.1254, "step": 1326 }, { "epoch": 0.2, "grad_norm": 3.448844738758746, "learning_rate": 1.8468842990446738e-05, "loss": 1.052, "step": 1327 }, { "epoch": 0.2, "grad_norm": 3.495073379772717, "learning_rate": 1.84662056435154e-05, "loss": 1.0485, "step": 1328 }, { "epoch": 0.2, "grad_norm": 12.588068668936893, "learning_rate": 1.8463566215828153e-05, "loss": 1.1313, "step": 1329 }, { "epoch": 0.2, "grad_norm": 3.5046696564398703, "learning_rate": 1.8460924708033703e-05, "loss": 1.0228, "step": 1330 }, { "epoch": 0.2, "grad_norm": 3.311822770954448, "learning_rate": 1.845828112078125e-05, "loss": 0.9789, "step": 1331 }, { "epoch": 0.2, "grad_norm": 3.6065329287172045, "learning_rate": 1.8455635454720523e-05, "loss": 0.952, "step": 1332 }, { "epoch": 0.2, "grad_norm": 3.337957980232742, "learning_rate": 1.845298771050175e-05, "loss": 1.039, "step": 1333 }, { "epoch": 0.2, "grad_norm": 3.1850080920372354, "learning_rate": 1.845033788877567e-05, "loss": 0.933, "step": 1334 }, { "epoch": 0.2, "grad_norm": 3.230879748045431, "learning_rate": 1.8447685990193535e-05, "loss": 1.0526, "step": 1335 }, { "epoch": 0.2, "grad_norm": 2.85396414990987, "learning_rate": 1.8445032015407107e-05, "loss": 1.0069, "step": 1336 }, { "epoch": 0.2, "grad_norm": 2.9378848623087728, "learning_rate": 1.844237596506866e-05, "loss": 0.9669, "step": 1337 }, { "epoch": 0.2, "grad_norm": 2.854668930026993, "learning_rate": 1.8439717839830974e-05, "loss": 1.0146, "step": 1338 }, { "epoch": 0.2, "grad_norm": 3.099600996688908, "learning_rate": 1.843705764034735e-05, "loss": 1.0386, "step": 1339 }, { "epoch": 0.21, "grad_norm": 3.2855360587889026, "learning_rate": 1.843439536727158e-05, "loss": 0.9487, "step": 1340 }, { "epoch": 0.21, "grad_norm": 3.4579980415822007, "learning_rate": 1.843173102125798e-05, "loss": 0.9574, "step": 1341 }, { "epoch": 0.21, "grad_norm": 3.7273952322801116, "learning_rate": 1.842906460296137e-05, "loss": 0.9395, "step": 1342 }, { "epoch": 0.21, "grad_norm": 3.275683451454534, "learning_rate": 1.8426396113037085e-05, "loss": 0.9334, "step": 1343 }, { "epoch": 0.21, "grad_norm": 3.210881842639474, "learning_rate": 1.8423725552140964e-05, "loss": 0.986, "step": 1344 }, { "epoch": 0.21, "grad_norm": 34.68569610373674, "learning_rate": 1.842105292092935e-05, "loss": 1.1653, "step": 1345 }, { "epoch": 0.21, "grad_norm": 2.802505121467872, "learning_rate": 1.841837822005911e-05, "loss": 0.9074, "step": 1346 }, { "epoch": 0.21, "grad_norm": 3.1070745294033553, "learning_rate": 1.8415701450187603e-05, "loss": 1.0476, "step": 1347 }, { "epoch": 0.21, "grad_norm": 3.262380215220285, "learning_rate": 1.8413022611972707e-05, "loss": 1.0297, "step": 1348 }, { "epoch": 0.21, "grad_norm": 3.033952372865329, "learning_rate": 1.8410341706072805e-05, "loss": 1.015, "step": 1349 }, { "epoch": 0.21, "grad_norm": 3.323263789902725, "learning_rate": 1.8407658733146788e-05, "loss": 0.9336, "step": 1350 }, { "epoch": 0.21, "grad_norm": 3.3046967974580457, "learning_rate": 1.8404973693854056e-05, "loss": 0.9665, "step": 1351 }, { "epoch": 0.21, "grad_norm": 3.2788674106321425, "learning_rate": 1.840228658885452e-05, "loss": 1.001, "step": 1352 }, { "epoch": 0.21, "grad_norm": 3.1826543622998025, "learning_rate": 1.8399597418808588e-05, "loss": 0.9061, "step": 1353 }, { "epoch": 0.21, "grad_norm": 3.232063653180614, "learning_rate": 1.8396906184377185e-05, "loss": 0.9828, "step": 1354 }, { "epoch": 0.21, "grad_norm": 3.1539792259539645, "learning_rate": 1.8394212886221743e-05, "loss": 1.0339, "step": 1355 }, { "epoch": 0.21, "grad_norm": 3.2276005454751435, "learning_rate": 1.8391517525004202e-05, "loss": 0.9972, "step": 1356 }, { "epoch": 0.21, "grad_norm": 2.933260145178024, "learning_rate": 1.8388820101387e-05, "loss": 0.8192, "step": 1357 }, { "epoch": 0.21, "grad_norm": 3.302982115720018, "learning_rate": 1.8386120616033088e-05, "loss": 1.0183, "step": 1358 }, { "epoch": 0.21, "grad_norm": 3.6318377709444007, "learning_rate": 1.8383419069605928e-05, "loss": 1.0127, "step": 1359 }, { "epoch": 0.21, "grad_norm": 3.0740579352046966, "learning_rate": 1.838071546276948e-05, "loss": 0.9438, "step": 1360 }, { "epoch": 0.21, "grad_norm": 2.7634617730211786, "learning_rate": 1.837800979618822e-05, "loss": 0.9507, "step": 1361 }, { "epoch": 0.21, "grad_norm": 3.340650030000716, "learning_rate": 1.837530207052712e-05, "loss": 0.9877, "step": 1362 }, { "epoch": 0.21, "grad_norm": 3.1075200381421957, "learning_rate": 1.8372592286451668e-05, "loss": 1.0457, "step": 1363 }, { "epoch": 0.21, "grad_norm": 3.336523570898075, "learning_rate": 1.836988044462785e-05, "loss": 1.0177, "step": 1364 }, { "epoch": 0.21, "grad_norm": 3.314086950533041, "learning_rate": 1.836716654572216e-05, "loss": 1.0487, "step": 1365 }, { "epoch": 0.21, "grad_norm": 3.6581734234286944, "learning_rate": 1.83644505904016e-05, "loss": 0.9876, "step": 1366 }, { "epoch": 0.21, "grad_norm": 3.224387145206634, "learning_rate": 1.836173257933367e-05, "loss": 0.9458, "step": 1367 }, { "epoch": 0.21, "grad_norm": 3.140033745322203, "learning_rate": 1.835901251318639e-05, "loss": 0.9106, "step": 1368 }, { "epoch": 0.21, "grad_norm": 2.875829759401323, "learning_rate": 1.835629039262827e-05, "loss": 0.9946, "step": 1369 }, { "epoch": 0.21, "grad_norm": 7.450143819458855, "learning_rate": 1.8353566218328333e-05, "loss": 1.1555, "step": 1370 }, { "epoch": 0.21, "grad_norm": 3.442123035895518, "learning_rate": 1.8350839990956104e-05, "loss": 0.9642, "step": 1371 }, { "epoch": 0.21, "grad_norm": 3.1978975027659757, "learning_rate": 1.8348111711181614e-05, "loss": 0.8898, "step": 1372 }, { "epoch": 0.21, "grad_norm": 3.259845525408301, "learning_rate": 1.8345381379675397e-05, "loss": 1.0292, "step": 1373 }, { "epoch": 0.21, "grad_norm": 3.165231681661386, "learning_rate": 1.834264899710849e-05, "loss": 0.9839, "step": 1374 }, { "epoch": 0.21, "grad_norm": 2.895179041315747, "learning_rate": 1.8339914564152442e-05, "loss": 0.912, "step": 1375 }, { "epoch": 0.21, "grad_norm": 3.33805355430569, "learning_rate": 1.8337178081479292e-05, "loss": 1.0233, "step": 1376 }, { "epoch": 0.21, "grad_norm": 5.270798423226357, "learning_rate": 1.8334439549761596e-05, "loss": 1.0438, "step": 1377 }, { "epoch": 0.21, "grad_norm": 3.386659692187951, "learning_rate": 1.8331698969672405e-05, "loss": 1.0603, "step": 1378 }, { "epoch": 0.21, "grad_norm": 3.29273059399376, "learning_rate": 1.832895634188528e-05, "loss": 0.9176, "step": 1379 }, { "epoch": 0.21, "grad_norm": 3.2139646813203813, "learning_rate": 1.832621166707428e-05, "loss": 1.0601, "step": 1380 }, { "epoch": 0.21, "grad_norm": 3.591974463038327, "learning_rate": 1.8323464945913967e-05, "loss": 0.9984, "step": 1381 }, { "epoch": 0.21, "grad_norm": 3.0970045925072758, "learning_rate": 1.8320716179079414e-05, "loss": 0.9418, "step": 1382 }, { "epoch": 0.21, "grad_norm": 3.3020292537179117, "learning_rate": 1.831796536724619e-05, "loss": 0.977, "step": 1383 }, { "epoch": 0.21, "grad_norm": 3.1556072875509744, "learning_rate": 1.831521251109036e-05, "loss": 0.9942, "step": 1384 }, { "epoch": 0.21, "grad_norm": 3.4902091848804098, "learning_rate": 1.8312457611288506e-05, "loss": 1.038, "step": 1385 }, { "epoch": 0.21, "grad_norm": 4.785598451488263, "learning_rate": 1.8309700668517703e-05, "loss": 0.9436, "step": 1386 }, { "epoch": 0.21, "grad_norm": 3.0068519029089527, "learning_rate": 1.8306941683455528e-05, "loss": 1.0645, "step": 1387 }, { "epoch": 0.21, "grad_norm": 3.220487909077531, "learning_rate": 1.8304180656780062e-05, "loss": 0.9674, "step": 1388 }, { "epoch": 0.21, "grad_norm": 3.163243004696232, "learning_rate": 1.8301417589169898e-05, "loss": 1.0972, "step": 1389 }, { "epoch": 0.21, "grad_norm": 4.4175119770718325, "learning_rate": 1.8298652481304104e-05, "loss": 0.8804, "step": 1390 }, { "epoch": 0.21, "grad_norm": 3.820628451840013, "learning_rate": 1.829588533386228e-05, "loss": 1.0035, "step": 1391 }, { "epoch": 0.21, "grad_norm": 3.0343947926471664, "learning_rate": 1.8293116147524506e-05, "loss": 0.9804, "step": 1392 }, { "epoch": 0.21, "grad_norm": 3.48329283864859, "learning_rate": 1.829034492297137e-05, "loss": 1.0318, "step": 1393 }, { "epoch": 0.21, "grad_norm": 3.2509945928878103, "learning_rate": 1.8287571660883967e-05, "loss": 0.9648, "step": 1394 }, { "epoch": 0.21, "grad_norm": 3.3114408715390753, "learning_rate": 1.828479636194388e-05, "loss": 1.0345, "step": 1395 }, { "epoch": 0.21, "grad_norm": 3.026796233762158, "learning_rate": 1.8282019026833205e-05, "loss": 1.0983, "step": 1396 }, { "epoch": 0.21, "grad_norm": 3.3602743957091996, "learning_rate": 1.8279239656234537e-05, "loss": 0.9512, "step": 1397 }, { "epoch": 0.21, "grad_norm": 2.9546018486657926, "learning_rate": 1.8276458250830954e-05, "loss": 0.9391, "step": 1398 }, { "epoch": 0.21, "grad_norm": 2.8325729704271416, "learning_rate": 1.8273674811306056e-05, "loss": 1.0347, "step": 1399 }, { "epoch": 0.21, "grad_norm": 3.0823581307912495, "learning_rate": 1.8270889338343934e-05, "loss": 0.9359, "step": 1400 }, { "epoch": 0.21, "grad_norm": 3.1512300975836927, "learning_rate": 1.8268101832629177e-05, "loss": 0.9626, "step": 1401 }, { "epoch": 0.21, "grad_norm": 3.3099205822582833, "learning_rate": 1.826531229484688e-05, "loss": 0.8663, "step": 1402 }, { "epoch": 0.21, "grad_norm": 3.183353819847706, "learning_rate": 1.8262520725682628e-05, "loss": 1.008, "step": 1403 }, { "epoch": 0.21, "grad_norm": 3.123487813760808, "learning_rate": 1.8259727125822514e-05, "loss": 0.9991, "step": 1404 }, { "epoch": 0.22, "grad_norm": 3.1737631440311107, "learning_rate": 1.825693149595312e-05, "loss": 1.0189, "step": 1405 }, { "epoch": 0.22, "grad_norm": 3.2698968208545995, "learning_rate": 1.8254133836761547e-05, "loss": 1.0219, "step": 1406 }, { "epoch": 0.22, "grad_norm": 2.993678348890785, "learning_rate": 1.825133414893537e-05, "loss": 0.978, "step": 1407 }, { "epoch": 0.22, "grad_norm": 2.8830861585649417, "learning_rate": 1.8248532433162672e-05, "loss": 1.0617, "step": 1408 }, { "epoch": 0.22, "grad_norm": 3.5798528339110396, "learning_rate": 1.8245728690132044e-05, "loss": 1.0044, "step": 1409 }, { "epoch": 0.22, "grad_norm": 3.211449496027972, "learning_rate": 1.8242922920532566e-05, "loss": 0.9857, "step": 1410 }, { "epoch": 0.22, "grad_norm": 3.243617650417417, "learning_rate": 1.8240115125053816e-05, "loss": 0.9601, "step": 1411 }, { "epoch": 0.22, "grad_norm": 3.259960752667389, "learning_rate": 1.823730530438587e-05, "loss": 1.0182, "step": 1412 }, { "epoch": 0.22, "grad_norm": 3.3528873993989965, "learning_rate": 1.82344934592193e-05, "loss": 0.9254, "step": 1413 }, { "epoch": 0.22, "grad_norm": 3.3073550478896805, "learning_rate": 1.8231679590245185e-05, "loss": 0.9833, "step": 1414 }, { "epoch": 0.22, "grad_norm": 3.3731207113327057, "learning_rate": 1.8228863698155095e-05, "loss": 1.0441, "step": 1415 }, { "epoch": 0.22, "grad_norm": 3.4125271371405015, "learning_rate": 1.8226045783641094e-05, "loss": 0.9709, "step": 1416 }, { "epoch": 0.22, "grad_norm": 3.4114700891629424, "learning_rate": 1.8223225847395745e-05, "loss": 1.1116, "step": 1417 }, { "epoch": 0.22, "grad_norm": 3.302800754545404, "learning_rate": 1.822040389011212e-05, "loss": 1.0117, "step": 1418 }, { "epoch": 0.22, "grad_norm": 3.472788092538862, "learning_rate": 1.8217579912483758e-05, "loss": 0.9642, "step": 1419 }, { "epoch": 0.22, "grad_norm": 3.4919990970322954, "learning_rate": 1.821475391520473e-05, "loss": 0.9361, "step": 1420 }, { "epoch": 0.22, "grad_norm": 3.578796053182178, "learning_rate": 1.821192589896958e-05, "loss": 0.9699, "step": 1421 }, { "epoch": 0.22, "grad_norm": 2.943372444254199, "learning_rate": 1.8209095864473357e-05, "loss": 0.9645, "step": 1422 }, { "epoch": 0.22, "grad_norm": 2.957107692774212, "learning_rate": 1.82062638124116e-05, "loss": 1.1112, "step": 1423 }, { "epoch": 0.22, "grad_norm": 3.3304564555728913, "learning_rate": 1.8203429743480356e-05, "loss": 0.9292, "step": 1424 }, { "epoch": 0.22, "grad_norm": 3.4338118492304623, "learning_rate": 1.8200593658376152e-05, "loss": 1.0365, "step": 1425 }, { "epoch": 0.22, "grad_norm": 3.6715752225401705, "learning_rate": 1.819775555779602e-05, "loss": 0.9023, "step": 1426 }, { "epoch": 0.22, "grad_norm": 2.9897434792770268, "learning_rate": 1.8194915442437487e-05, "loss": 0.9809, "step": 1427 }, { "epoch": 0.22, "grad_norm": 3.2133451138736357, "learning_rate": 1.8192073312998574e-05, "loss": 1.0375, "step": 1428 }, { "epoch": 0.22, "grad_norm": 3.2079646767332197, "learning_rate": 1.8189229170177797e-05, "loss": 0.9501, "step": 1429 }, { "epoch": 0.22, "grad_norm": 3.5101854117888256, "learning_rate": 1.8186383014674164e-05, "loss": 0.9791, "step": 1430 }, { "epoch": 0.22, "grad_norm": 3.195330863399731, "learning_rate": 1.8183534847187184e-05, "loss": 0.9585, "step": 1431 }, { "epoch": 0.22, "grad_norm": 2.942786231263773, "learning_rate": 1.818068466841685e-05, "loss": 0.9436, "step": 1432 }, { "epoch": 0.22, "grad_norm": 3.380362306882057, "learning_rate": 1.8177832479063663e-05, "loss": 1.0471, "step": 1433 }, { "epoch": 0.22, "grad_norm": 2.9912397523219942, "learning_rate": 1.817497827982861e-05, "loss": 0.9627, "step": 1434 }, { "epoch": 0.22, "grad_norm": 3.131458645693922, "learning_rate": 1.817212207141317e-05, "loss": 1.0078, "step": 1435 }, { "epoch": 0.22, "grad_norm": 3.5044622984176814, "learning_rate": 1.816926385451932e-05, "loss": 0.9924, "step": 1436 }, { "epoch": 0.22, "grad_norm": 3.2764286835338834, "learning_rate": 1.8166403629849533e-05, "loss": 0.9193, "step": 1437 }, { "epoch": 0.22, "grad_norm": 2.955062497191095, "learning_rate": 1.8163541398106765e-05, "loss": 0.967, "step": 1438 }, { "epoch": 0.22, "grad_norm": 3.2063986435021348, "learning_rate": 1.8160677159994482e-05, "loss": 0.9361, "step": 1439 }, { "epoch": 0.22, "grad_norm": 3.3253753539110837, "learning_rate": 1.815781091621662e-05, "loss": 0.9221, "step": 1440 }, { "epoch": 0.22, "grad_norm": 3.147461882152192, "learning_rate": 1.815494266747764e-05, "loss": 1.0155, "step": 1441 }, { "epoch": 0.22, "grad_norm": 3.05539303881804, "learning_rate": 1.8152072414482456e-05, "loss": 1.026, "step": 1442 }, { "epoch": 0.22, "grad_norm": 3.0298394868604213, "learning_rate": 1.8149200157936512e-05, "loss": 1.0316, "step": 1443 }, { "epoch": 0.22, "grad_norm": 7.056299294425606, "learning_rate": 1.814632589854572e-05, "loss": 1.1486, "step": 1444 }, { "epoch": 0.22, "grad_norm": 2.9964798081837896, "learning_rate": 1.8143449637016495e-05, "loss": 0.9409, "step": 1445 }, { "epoch": 0.22, "grad_norm": 3.500281476121371, "learning_rate": 1.8140571374055737e-05, "loss": 0.9786, "step": 1446 }, { "epoch": 0.22, "grad_norm": 2.7525323910569925, "learning_rate": 1.8137691110370852e-05, "loss": 0.9384, "step": 1447 }, { "epoch": 0.22, "grad_norm": 3.005090782342863, "learning_rate": 1.813480884666972e-05, "loss": 1.0044, "step": 1448 }, { "epoch": 0.22, "grad_norm": 2.996105565791517, "learning_rate": 1.813192458366072e-05, "loss": 1.0387, "step": 1449 }, { "epoch": 0.22, "grad_norm": 3.2121612732948535, "learning_rate": 1.8129038322052727e-05, "loss": 0.9612, "step": 1450 }, { "epoch": 0.22, "grad_norm": 3.0154524615068325, "learning_rate": 1.8126150062555107e-05, "loss": 0.9509, "step": 1451 }, { "epoch": 0.22, "grad_norm": 3.124974923490825, "learning_rate": 1.81232598058777e-05, "loss": 0.9746, "step": 1452 }, { "epoch": 0.22, "grad_norm": 3.264530323861853, "learning_rate": 1.8120367552730865e-05, "loss": 0.9343, "step": 1453 }, { "epoch": 0.22, "grad_norm": 3.394005356454698, "learning_rate": 1.8117473303825426e-05, "loss": 1.0168, "step": 1454 }, { "epoch": 0.22, "grad_norm": 3.0760671123580106, "learning_rate": 1.811457705987271e-05, "loss": 1.067, "step": 1455 }, { "epoch": 0.22, "grad_norm": 3.196152023485417, "learning_rate": 1.811167882158454e-05, "loss": 0.9804, "step": 1456 }, { "epoch": 0.22, "grad_norm": 3.5596382958999953, "learning_rate": 1.8108778589673216e-05, "loss": 1.0635, "step": 1457 }, { "epoch": 0.22, "grad_norm": 3.116134009243598, "learning_rate": 1.810587636485153e-05, "loss": 1.1408, "step": 1458 }, { "epoch": 0.22, "grad_norm": 3.0966651802438867, "learning_rate": 1.8102972147832775e-05, "loss": 0.9698, "step": 1459 }, { "epoch": 0.22, "grad_norm": 2.9877198835598753, "learning_rate": 1.8100065939330716e-05, "loss": 0.9354, "step": 1460 }, { "epoch": 0.22, "grad_norm": 3.0404801902248355, "learning_rate": 1.809715774005963e-05, "loss": 1.0685, "step": 1461 }, { "epoch": 0.22, "grad_norm": 2.948143128537915, "learning_rate": 1.8094247550734262e-05, "loss": 1.0719, "step": 1462 }, { "epoch": 0.22, "grad_norm": 3.066447715290956, "learning_rate": 1.8091335372069857e-05, "loss": 0.9708, "step": 1463 }, { "epoch": 0.22, "grad_norm": 3.115030578189605, "learning_rate": 1.8088421204782153e-05, "loss": 1.0377, "step": 1464 }, { "epoch": 0.22, "grad_norm": 3.1425095469293893, "learning_rate": 1.808550504958736e-05, "loss": 0.9705, "step": 1465 }, { "epoch": 0.22, "grad_norm": 3.244217587585244, "learning_rate": 1.8082586907202194e-05, "loss": 0.9619, "step": 1466 }, { "epoch": 0.22, "grad_norm": 3.20412918400006, "learning_rate": 1.8079666778343853e-05, "loss": 1.0447, "step": 1467 }, { "epoch": 0.22, "grad_norm": 4.459745892769258, "learning_rate": 1.8076744663730016e-05, "loss": 0.9423, "step": 1468 }, { "epoch": 0.22, "grad_norm": 3.138069027731141, "learning_rate": 1.8073820564078865e-05, "loss": 1.0486, "step": 1469 }, { "epoch": 0.23, "grad_norm": 3.425622599914716, "learning_rate": 1.8070894480109056e-05, "loss": 0.8424, "step": 1470 }, { "epoch": 0.23, "grad_norm": 3.2962585539079887, "learning_rate": 1.806796641253974e-05, "loss": 1.0893, "step": 1471 }, { "epoch": 0.23, "grad_norm": 3.2303926782782053, "learning_rate": 1.8065036362090555e-05, "loss": 0.975, "step": 1472 }, { "epoch": 0.23, "grad_norm": 9.037522825764029, "learning_rate": 1.806210432948163e-05, "loss": 1.1881, "step": 1473 }, { "epoch": 0.23, "grad_norm": 3.288380690108407, "learning_rate": 1.8059170315433565e-05, "loss": 0.9964, "step": 1474 }, { "epoch": 0.23, "grad_norm": 2.920521462332691, "learning_rate": 1.8056234320667465e-05, "loss": 0.9707, "step": 1475 }, { "epoch": 0.23, "grad_norm": 5.257974512775954, "learning_rate": 1.8053296345904913e-05, "loss": 1.1267, "step": 1476 }, { "epoch": 0.23, "grad_norm": 3.307161089261329, "learning_rate": 1.8050356391867988e-05, "loss": 0.9739, "step": 1477 }, { "epoch": 0.23, "grad_norm": 3.24739860932814, "learning_rate": 1.804741445927924e-05, "loss": 0.8637, "step": 1478 }, { "epoch": 0.23, "grad_norm": 2.9989659358800385, "learning_rate": 1.8044470548861715e-05, "loss": 0.9047, "step": 1479 }, { "epoch": 0.23, "grad_norm": 3.10437368249323, "learning_rate": 1.8041524661338943e-05, "loss": 0.9233, "step": 1480 }, { "epoch": 0.23, "grad_norm": 2.794739597626496, "learning_rate": 1.8038576797434945e-05, "loss": 0.827, "step": 1481 }, { "epoch": 0.23, "grad_norm": 3.1128014833427837, "learning_rate": 1.803562695787422e-05, "loss": 0.9067, "step": 1482 }, { "epoch": 0.23, "grad_norm": 2.9988136690702203, "learning_rate": 1.8032675143381756e-05, "loss": 0.9164, "step": 1483 }, { "epoch": 0.23, "grad_norm": 3.011789954198862, "learning_rate": 1.8029721354683024e-05, "loss": 0.8821, "step": 1484 }, { "epoch": 0.23, "grad_norm": 3.1678259833469715, "learning_rate": 1.802676559250399e-05, "loss": 0.9442, "step": 1485 }, { "epoch": 0.23, "grad_norm": 3.186525158553192, "learning_rate": 1.8023807857571092e-05, "loss": 0.9632, "step": 1486 }, { "epoch": 0.23, "grad_norm": 3.397793056047527, "learning_rate": 1.8020848150611257e-05, "loss": 0.9299, "step": 1487 }, { "epoch": 0.23, "grad_norm": 3.1337686121653165, "learning_rate": 1.8017886472351898e-05, "loss": 1.0272, "step": 1488 }, { "epoch": 0.23, "grad_norm": 3.2023763342435103, "learning_rate": 1.8014922823520918e-05, "loss": 1.0095, "step": 1489 }, { "epoch": 0.23, "grad_norm": 3.179613140976094, "learning_rate": 1.8011957204846694e-05, "loss": 0.9999, "step": 1490 }, { "epoch": 0.23, "grad_norm": 2.9569570559790943, "learning_rate": 1.8008989617058088e-05, "loss": 0.9663, "step": 1491 }, { "epoch": 0.23, "grad_norm": 2.819221986668142, "learning_rate": 1.800602006088446e-05, "loss": 0.9449, "step": 1492 }, { "epoch": 0.23, "grad_norm": 3.0987259678730616, "learning_rate": 1.8003048537055634e-05, "loss": 0.9663, "step": 1493 }, { "epoch": 0.23, "grad_norm": 3.3105056929517525, "learning_rate": 1.8000075046301937e-05, "loss": 0.971, "step": 1494 }, { "epoch": 0.23, "grad_norm": 3.0776614929117736, "learning_rate": 1.7997099589354162e-05, "loss": 0.9827, "step": 1495 }, { "epoch": 0.23, "grad_norm": 2.808460903546118, "learning_rate": 1.799412216694359e-05, "loss": 0.8083, "step": 1496 }, { "epoch": 0.23, "grad_norm": 4.12190977184157, "learning_rate": 1.7991142779801997e-05, "loss": 0.9388, "step": 1497 }, { "epoch": 0.23, "grad_norm": 3.4668326908121068, "learning_rate": 1.798816142866163e-05, "loss": 1.0586, "step": 1498 }, { "epoch": 0.23, "grad_norm": 3.0588969927516345, "learning_rate": 1.798517811425522e-05, "loss": 0.9958, "step": 1499 }, { "epoch": 0.23, "grad_norm": 3.0271086429161507, "learning_rate": 1.798219283731598e-05, "loss": 1.0154, "step": 1500 }, { "epoch": 0.23, "grad_norm": 3.113264536411894, "learning_rate": 1.797920559857761e-05, "loss": 0.9389, "step": 1501 }, { "epoch": 0.23, "grad_norm": 3.181762870336013, "learning_rate": 1.7976216398774292e-05, "loss": 1.0066, "step": 1502 }, { "epoch": 0.23, "grad_norm": 2.957030913988016, "learning_rate": 1.7973225238640682e-05, "loss": 1.0518, "step": 1503 }, { "epoch": 0.23, "grad_norm": 3.1225430695720218, "learning_rate": 1.7970232118911927e-05, "loss": 0.954, "step": 1504 }, { "epoch": 0.23, "grad_norm": 2.8346355377657373, "learning_rate": 1.796723704032365e-05, "loss": 1.0051, "step": 1505 }, { "epoch": 0.23, "grad_norm": 3.222418799430063, "learning_rate": 1.7964240003611958e-05, "loss": 0.9165, "step": 1506 }, { "epoch": 0.23, "grad_norm": 2.943155474907022, "learning_rate": 1.7961241009513436e-05, "loss": 0.9589, "step": 1507 }, { "epoch": 0.23, "grad_norm": 10.091694549571042, "learning_rate": 1.795824005876516e-05, "loss": 1.2324, "step": 1508 }, { "epoch": 0.23, "grad_norm": 3.289114095658659, "learning_rate": 1.7955237152104673e-05, "loss": 0.9287, "step": 1509 }, { "epoch": 0.23, "grad_norm": 3.59972267883229, "learning_rate": 1.7952232290270007e-05, "loss": 0.9872, "step": 1510 }, { "epoch": 0.23, "grad_norm": 3.5090128528213906, "learning_rate": 1.794922547399967e-05, "loss": 0.9683, "step": 1511 }, { "epoch": 0.23, "grad_norm": 3.1106921697068057, "learning_rate": 1.7946216704032662e-05, "loss": 0.9422, "step": 1512 }, { "epoch": 0.23, "grad_norm": 3.0391493782354364, "learning_rate": 1.7943205981108442e-05, "loss": 0.9405, "step": 1513 }, { "epoch": 0.23, "grad_norm": 2.8944574754838523, "learning_rate": 1.794019330596697e-05, "loss": 0.9658, "step": 1514 }, { "epoch": 0.23, "grad_norm": 3.0285972606047755, "learning_rate": 1.7937178679348675e-05, "loss": 0.9986, "step": 1515 }, { "epoch": 0.23, "grad_norm": 3.0896134059161215, "learning_rate": 1.7934162101994464e-05, "loss": 0.8707, "step": 1516 }, { "epoch": 0.23, "grad_norm": 3.3401810543095163, "learning_rate": 1.7931143574645735e-05, "loss": 1.0286, "step": 1517 }, { "epoch": 0.23, "grad_norm": 3.711419672994701, "learning_rate": 1.7928123098044353e-05, "loss": 0.9151, "step": 1518 }, { "epoch": 0.23, "grad_norm": 3.2190767791084656, "learning_rate": 1.792510067293266e-05, "loss": 1.0052, "step": 1519 }, { "epoch": 0.23, "grad_norm": 3.570504511484273, "learning_rate": 1.7922076300053493e-05, "loss": 0.9412, "step": 1520 }, { "epoch": 0.23, "grad_norm": 3.212523605322943, "learning_rate": 1.7919049980150155e-05, "loss": 1.0195, "step": 1521 }, { "epoch": 0.23, "grad_norm": 3.3752111549249815, "learning_rate": 1.791602171396643e-05, "loss": 0.9564, "step": 1522 }, { "epoch": 0.23, "grad_norm": 2.8989772659100908, "learning_rate": 1.7912991502246578e-05, "loss": 0.9777, "step": 1523 }, { "epoch": 0.23, "grad_norm": 3.0079429607041592, "learning_rate": 1.7909959345735346e-05, "loss": 0.9082, "step": 1524 }, { "epoch": 0.23, "grad_norm": 3.3863097419634585, "learning_rate": 1.790692524517795e-05, "loss": 0.9536, "step": 1525 }, { "epoch": 0.23, "grad_norm": 3.2678279877371126, "learning_rate": 1.7903889201320083e-05, "loss": 0.9782, "step": 1526 }, { "epoch": 0.23, "grad_norm": 3.219374266336708, "learning_rate": 1.7900851214907925e-05, "loss": 0.9392, "step": 1527 }, { "epoch": 0.23, "grad_norm": 3.04359995207335, "learning_rate": 1.789781128668813e-05, "loss": 1.068, "step": 1528 }, { "epoch": 0.23, "grad_norm": 3.190840020564337, "learning_rate": 1.789476941740782e-05, "loss": 0.9424, "step": 1529 }, { "epoch": 0.23, "grad_norm": 4.107360791628095, "learning_rate": 1.7891725607814602e-05, "loss": 0.9268, "step": 1530 }, { "epoch": 0.23, "grad_norm": 2.8123151853339428, "learning_rate": 1.7888679858656566e-05, "loss": 0.909, "step": 1531 }, { "epoch": 0.23, "grad_norm": 2.9281300713797394, "learning_rate": 1.7885632170682266e-05, "loss": 0.9875, "step": 1532 }, { "epoch": 0.23, "grad_norm": 3.1581987594287066, "learning_rate": 1.7882582544640736e-05, "loss": 0.8797, "step": 1533 }, { "epoch": 0.23, "grad_norm": 2.955887862870306, "learning_rate": 1.7879530981281492e-05, "loss": 0.956, "step": 1534 }, { "epoch": 0.23, "grad_norm": 2.9509542667470274, "learning_rate": 1.7876477481354527e-05, "loss": 1.0239, "step": 1535 }, { "epoch": 0.24, "grad_norm": 2.543176942825485, "learning_rate": 1.78734220456103e-05, "loss": 0.8289, "step": 1536 }, { "epoch": 0.24, "grad_norm": 3.1781665264428587, "learning_rate": 1.787036467479975e-05, "loss": 0.9332, "step": 1537 }, { "epoch": 0.24, "grad_norm": 3.009086672085074, "learning_rate": 1.7867305369674297e-05, "loss": 1.0283, "step": 1538 }, { "epoch": 0.24, "grad_norm": 3.001359683433981, "learning_rate": 1.7864244130985827e-05, "loss": 0.9153, "step": 1539 }, { "epoch": 0.24, "grad_norm": 3.5693255061400704, "learning_rate": 1.7861180959486718e-05, "loss": 1.0207, "step": 1540 }, { "epoch": 0.24, "grad_norm": 3.1355580568313433, "learning_rate": 1.7858115855929797e-05, "loss": 0.8707, "step": 1541 }, { "epoch": 0.24, "grad_norm": 7.535492268525197, "learning_rate": 1.785504882106839e-05, "loss": 1.1984, "step": 1542 }, { "epoch": 0.24, "grad_norm": 3.2349686394576134, "learning_rate": 1.7851979855656292e-05, "loss": 1.023, "step": 1543 }, { "epoch": 0.24, "grad_norm": 3.128701067205983, "learning_rate": 1.784890896044776e-05, "loss": 0.931, "step": 1544 }, { "epoch": 0.24, "grad_norm": 3.1756175385193623, "learning_rate": 1.784583613619753e-05, "loss": 1.0761, "step": 1545 }, { "epoch": 0.24, "grad_norm": 3.2455472161944567, "learning_rate": 1.784276138366083e-05, "loss": 0.9923, "step": 1546 }, { "epoch": 0.24, "grad_norm": 3.372209180487672, "learning_rate": 1.7839684703593333e-05, "loss": 0.9046, "step": 1547 }, { "epoch": 0.24, "grad_norm": 3.7337452784886636, "learning_rate": 1.7836606096751215e-05, "loss": 0.9483, "step": 1548 }, { "epoch": 0.24, "grad_norm": 4.047376458044384, "learning_rate": 1.78335255638911e-05, "loss": 0.9749, "step": 1549 }, { "epoch": 0.24, "grad_norm": 2.9377508290351053, "learning_rate": 1.7830443105770104e-05, "loss": 0.9275, "step": 1550 }, { "epoch": 0.24, "grad_norm": 3.3213718081164423, "learning_rate": 1.7827358723145803e-05, "loss": 1.0016, "step": 1551 }, { "epoch": 0.24, "grad_norm": 3.125648807004082, "learning_rate": 1.7824272416776255e-05, "loss": 0.9499, "step": 1552 }, { "epoch": 0.24, "grad_norm": 2.992772454537541, "learning_rate": 1.7821184187419986e-05, "loss": 1.0947, "step": 1553 }, { "epoch": 0.24, "grad_norm": 3.3498984461989743, "learning_rate": 1.7818094035835997e-05, "loss": 0.977, "step": 1554 }, { "epoch": 0.24, "grad_norm": 9.808823836859759, "learning_rate": 1.781500196278376e-05, "loss": 1.1493, "step": 1555 }, { "epoch": 0.24, "grad_norm": 2.7414312834078487, "learning_rate": 1.7811907969023226e-05, "loss": 0.9074, "step": 1556 }, { "epoch": 0.24, "grad_norm": 3.096725210487838, "learning_rate": 1.7808812055314803e-05, "loss": 1.0496, "step": 1557 }, { "epoch": 0.24, "grad_norm": 3.1454246260790453, "learning_rate": 1.7805714222419383e-05, "loss": 0.9642, "step": 1558 }, { "epoch": 0.24, "grad_norm": 4.39456934603716, "learning_rate": 1.7802614471098325e-05, "loss": 0.9703, "step": 1559 }, { "epoch": 0.24, "grad_norm": 2.8039173071331853, "learning_rate": 1.7799512802113463e-05, "loss": 0.8879, "step": 1560 }, { "epoch": 0.24, "grad_norm": 2.9674655561333063, "learning_rate": 1.77964092162271e-05, "loss": 1.0401, "step": 1561 }, { "epoch": 0.24, "grad_norm": 2.9232148337048396, "learning_rate": 1.7793303714202012e-05, "loss": 0.941, "step": 1562 }, { "epoch": 0.24, "grad_norm": 2.9934636053586705, "learning_rate": 1.7790196296801443e-05, "loss": 0.9383, "step": 1563 }, { "epoch": 0.24, "grad_norm": 2.9223907005030956, "learning_rate": 1.7787086964789107e-05, "loss": 0.9159, "step": 1564 }, { "epoch": 0.24, "grad_norm": 3.032596903774553, "learning_rate": 1.7783975718929188e-05, "loss": 0.8643, "step": 1565 }, { "epoch": 0.24, "grad_norm": 2.829783049588635, "learning_rate": 1.7780862559986353e-05, "loss": 0.9304, "step": 1566 }, { "epoch": 0.24, "grad_norm": 3.371369832138475, "learning_rate": 1.777774748872572e-05, "loss": 1.0539, "step": 1567 }, { "epoch": 0.24, "grad_norm": 3.098014467231705, "learning_rate": 1.7774630505912894e-05, "loss": 1.0586, "step": 1568 }, { "epoch": 0.24, "grad_norm": 6.854963050985832, "learning_rate": 1.7771511612313932e-05, "loss": 1.0926, "step": 1569 }, { "epoch": 0.24, "grad_norm": 3.03748855360975, "learning_rate": 1.7768390808695376e-05, "loss": 1.0332, "step": 1570 }, { "epoch": 0.24, "grad_norm": 3.2395041609630377, "learning_rate": 1.7765268095824234e-05, "loss": 0.8884, "step": 1571 }, { "epoch": 0.24, "grad_norm": 3.5766715300588996, "learning_rate": 1.776214347446798e-05, "loss": 1.0497, "step": 1572 }, { "epoch": 0.24, "grad_norm": 3.115277159581461, "learning_rate": 1.7759016945394554e-05, "loss": 0.9077, "step": 1573 }, { "epoch": 0.24, "grad_norm": 2.9805465907515285, "learning_rate": 1.7755888509372378e-05, "loss": 0.9555, "step": 1574 }, { "epoch": 0.24, "grad_norm": 2.8237610297834492, "learning_rate": 1.7752758167170327e-05, "loss": 0.8722, "step": 1575 }, { "epoch": 0.24, "grad_norm": 3.346858502108061, "learning_rate": 1.774962591955775e-05, "loss": 0.9513, "step": 1576 }, { "epoch": 0.24, "grad_norm": 6.972990157972103, "learning_rate": 1.774649176730447e-05, "loss": 1.0721, "step": 1577 }, { "epoch": 0.24, "grad_norm": 3.475587952247821, "learning_rate": 1.7743355711180772e-05, "loss": 1.0924, "step": 1578 }, { "epoch": 0.24, "grad_norm": 3.3373101780404073, "learning_rate": 1.774021775195741e-05, "loss": 0.9527, "step": 1579 }, { "epoch": 0.24, "grad_norm": 3.680026816225568, "learning_rate": 1.773707789040561e-05, "loss": 1.0351, "step": 1580 }, { "epoch": 0.24, "grad_norm": 3.20231272940428, "learning_rate": 1.7733936127297053e-05, "loss": 0.9025, "step": 1581 }, { "epoch": 0.24, "grad_norm": 3.135934758066214, "learning_rate": 1.7730792463403907e-05, "loss": 0.9604, "step": 1582 }, { "epoch": 0.24, "grad_norm": 3.048731265002102, "learning_rate": 1.772764689949879e-05, "loss": 0.9117, "step": 1583 }, { "epoch": 0.24, "grad_norm": 3.067612401660131, "learning_rate": 1.7724499436354796e-05, "loss": 0.9069, "step": 1584 }, { "epoch": 0.24, "grad_norm": 3.3240696511439345, "learning_rate": 1.7721350074745483e-05, "loss": 0.8682, "step": 1585 }, { "epoch": 0.24, "grad_norm": 3.2752756682214947, "learning_rate": 1.771819881544487e-05, "loss": 1.0783, "step": 1586 }, { "epoch": 0.24, "grad_norm": 2.882160412513278, "learning_rate": 1.771504565922746e-05, "loss": 0.9463, "step": 1587 }, { "epoch": 0.24, "grad_norm": 2.9978510227502273, "learning_rate": 1.7711890606868202e-05, "loss": 0.9297, "step": 1588 }, { "epoch": 0.24, "grad_norm": 2.9646358151861225, "learning_rate": 1.770873365914252e-05, "loss": 0.923, "step": 1589 }, { "epoch": 0.24, "grad_norm": 3.1091547510176922, "learning_rate": 1.77055748168263e-05, "loss": 0.9384, "step": 1590 }, { "epoch": 0.24, "grad_norm": 3.1877979522883124, "learning_rate": 1.7702414080695907e-05, "loss": 1.0714, "step": 1591 }, { "epoch": 0.24, "grad_norm": 3.2640181587855484, "learning_rate": 1.7699251451528152e-05, "loss": 1.0194, "step": 1592 }, { "epoch": 0.24, "grad_norm": 2.925145799537389, "learning_rate": 1.7696086930100323e-05, "loss": 0.8909, "step": 1593 }, { "epoch": 0.24, "grad_norm": 3.133756814413861, "learning_rate": 1.7692920517190175e-05, "loss": 0.9, "step": 1594 }, { "epoch": 0.24, "grad_norm": 3.2710171605177805, "learning_rate": 1.768975221357592e-05, "loss": 0.9672, "step": 1595 }, { "epoch": 0.24, "grad_norm": 3.068629248668608, "learning_rate": 1.7686582020036234e-05, "loss": 0.9769, "step": 1596 }, { "epoch": 0.24, "grad_norm": 2.830114814388996, "learning_rate": 1.7683409937350267e-05, "loss": 0.9226, "step": 1597 }, { "epoch": 0.24, "grad_norm": 3.052722151159689, "learning_rate": 1.768023596629763e-05, "loss": 0.9782, "step": 1598 }, { "epoch": 0.24, "grad_norm": 3.181907971299097, "learning_rate": 1.7677060107658387e-05, "loss": 0.94, "step": 1599 }, { "epoch": 0.24, "grad_norm": 3.0584100002745003, "learning_rate": 1.7673882362213085e-05, "loss": 0.9347, "step": 1600 }, { "epoch": 0.25, "grad_norm": 3.0730524882941292, "learning_rate": 1.7670702730742722e-05, "loss": 0.9766, "step": 1601 }, { "epoch": 0.25, "grad_norm": 2.743519182081731, "learning_rate": 1.7667521214028757e-05, "loss": 0.8648, "step": 1602 }, { "epoch": 0.25, "grad_norm": 3.0055379073044355, "learning_rate": 1.7664337812853122e-05, "loss": 0.9404, "step": 1603 }, { "epoch": 0.25, "grad_norm": 3.1226930811580136, "learning_rate": 1.766115252799821e-05, "loss": 1.0251, "step": 1604 }, { "epoch": 0.25, "grad_norm": 3.2624488224353567, "learning_rate": 1.7657965360246867e-05, "loss": 0.9208, "step": 1605 }, { "epoch": 0.25, "grad_norm": 3.0925944435585944, "learning_rate": 1.7654776310382417e-05, "loss": 0.8821, "step": 1606 }, { "epoch": 0.25, "grad_norm": 3.3295149139126234, "learning_rate": 1.7651585379188635e-05, "loss": 1.0782, "step": 1607 }, { "epoch": 0.25, "grad_norm": 2.903090564857671, "learning_rate": 1.7648392567449764e-05, "loss": 0.9306, "step": 1608 }, { "epoch": 0.25, "grad_norm": 2.981070901378658, "learning_rate": 1.7645197875950507e-05, "loss": 1.0098, "step": 1609 }, { "epoch": 0.25, "grad_norm": 3.2721472207784768, "learning_rate": 1.764200130547603e-05, "loss": 1.0286, "step": 1610 }, { "epoch": 0.25, "grad_norm": 3.0694378697061078, "learning_rate": 1.763880285681196e-05, "loss": 1.0652, "step": 1611 }, { "epoch": 0.25, "grad_norm": 3.6228799725758836, "learning_rate": 1.7635602530744387e-05, "loss": 0.9625, "step": 1612 }, { "epoch": 0.25, "grad_norm": 7.298349477713538, "learning_rate": 1.763240032805986e-05, "loss": 1.1157, "step": 1613 }, { "epoch": 0.25, "grad_norm": 3.4453013308981912, "learning_rate": 1.762919624954539e-05, "loss": 1.0214, "step": 1614 }, { "epoch": 0.25, "grad_norm": 3.025070577653514, "learning_rate": 1.762599029598845e-05, "loss": 0.9589, "step": 1615 }, { "epoch": 0.25, "grad_norm": 3.233682546527499, "learning_rate": 1.7622782468176974e-05, "loss": 1.036, "step": 1616 }, { "epoch": 0.25, "grad_norm": 3.211561445588491, "learning_rate": 1.761957276689936e-05, "loss": 0.9992, "step": 1617 }, { "epoch": 0.25, "grad_norm": 3.277215642818311, "learning_rate": 1.761636119294446e-05, "loss": 0.9019, "step": 1618 }, { "epoch": 0.25, "grad_norm": 6.402280666544251, "learning_rate": 1.761314774710158e-05, "loss": 1.0616, "step": 1619 }, { "epoch": 0.25, "grad_norm": 3.736470200541887, "learning_rate": 1.760993243016051e-05, "loss": 0.92, "step": 1620 }, { "epoch": 0.25, "grad_norm": 2.940226552076836, "learning_rate": 1.7606715242911468e-05, "loss": 0.8812, "step": 1621 }, { "epoch": 0.25, "grad_norm": 3.1723309239716233, "learning_rate": 1.7603496186145168e-05, "loss": 0.8744, "step": 1622 }, { "epoch": 0.25, "grad_norm": 3.191071987973035, "learning_rate": 1.7600275260652746e-05, "loss": 0.9277, "step": 1623 }, { "epoch": 0.25, "grad_norm": 3.239417253255005, "learning_rate": 1.7597052467225827e-05, "loss": 0.9204, "step": 1624 }, { "epoch": 0.25, "grad_norm": 3.192265372865029, "learning_rate": 1.7593827806656477e-05, "loss": 0.9358, "step": 1625 }, { "epoch": 0.25, "grad_norm": 3.0184333324198205, "learning_rate": 1.7590601279737232e-05, "loss": 0.9797, "step": 1626 }, { "epoch": 0.25, "grad_norm": 3.279109165748821, "learning_rate": 1.7587372887261077e-05, "loss": 1.0752, "step": 1627 }, { "epoch": 0.25, "grad_norm": 2.6953986178070153, "learning_rate": 1.7584142630021458e-05, "loss": 0.9401, "step": 1628 }, { "epoch": 0.25, "grad_norm": 3.172191903175026, "learning_rate": 1.758091050881229e-05, "loss": 1.0515, "step": 1629 }, { "epoch": 0.25, "grad_norm": 3.1727497820346677, "learning_rate": 1.7577676524427934e-05, "loss": 1.0884, "step": 1630 }, { "epoch": 0.25, "grad_norm": 3.1850780747179583, "learning_rate": 1.7574440677663212e-05, "loss": 1.1565, "step": 1631 }, { "epoch": 0.25, "grad_norm": 3.02776283992973, "learning_rate": 1.7571202969313403e-05, "loss": 1.0804, "step": 1632 }, { "epoch": 0.25, "grad_norm": 4.1377205276174385, "learning_rate": 1.7567963400174245e-05, "loss": 0.9776, "step": 1633 }, { "epoch": 0.25, "grad_norm": 3.2465903735570545, "learning_rate": 1.7564721971041937e-05, "loss": 0.8512, "step": 1634 }, { "epoch": 0.25, "grad_norm": 3.453705119144487, "learning_rate": 1.756147868271313e-05, "loss": 0.9562, "step": 1635 }, { "epoch": 0.25, "grad_norm": 3.1922476204360235, "learning_rate": 1.7558233535984933e-05, "loss": 0.9605, "step": 1636 }, { "epoch": 0.25, "grad_norm": 3.700710967013734, "learning_rate": 1.755498653165491e-05, "loss": 1.0348, "step": 1637 }, { "epoch": 0.25, "grad_norm": 3.2615697733633113, "learning_rate": 1.7551737670521084e-05, "loss": 1.0719, "step": 1638 }, { "epoch": 0.25, "grad_norm": 3.1274414203254888, "learning_rate": 1.7548486953381937e-05, "loss": 0.9591, "step": 1639 }, { "epoch": 0.25, "grad_norm": 3.1152780749906666, "learning_rate": 1.75452343810364e-05, "loss": 0.9675, "step": 1640 }, { "epoch": 0.25, "grad_norm": 3.0035425673242537, "learning_rate": 1.7541979954283864e-05, "loss": 1.0109, "step": 1641 }, { "epoch": 0.25, "grad_norm": 3.077978228296154, "learning_rate": 1.7538723673924185e-05, "loss": 0.922, "step": 1642 }, { "epoch": 0.25, "grad_norm": 2.739028913696504, "learning_rate": 1.7535465540757654e-05, "loss": 0.8579, "step": 1643 }, { "epoch": 0.25, "grad_norm": 3.3548693985388605, "learning_rate": 1.7532205555585032e-05, "loss": 0.8721, "step": 1644 }, { "epoch": 0.25, "grad_norm": 3.1780558911413643, "learning_rate": 1.7528943719207536e-05, "loss": 0.9789, "step": 1645 }, { "epoch": 0.25, "grad_norm": 8.872343924062486, "learning_rate": 1.752568003242683e-05, "loss": 1.1482, "step": 1646 }, { "epoch": 0.25, "grad_norm": 2.722081445165806, "learning_rate": 1.7522414496045037e-05, "loss": 0.9274, "step": 1647 }, { "epoch": 0.25, "grad_norm": 3.109960299502892, "learning_rate": 1.7519147110864736e-05, "loss": 1.0573, "step": 1648 }, { "epoch": 0.25, "grad_norm": 2.9004201318080516, "learning_rate": 1.7515877877688957e-05, "loss": 0.912, "step": 1649 }, { "epoch": 0.25, "grad_norm": 3.069111610446601, "learning_rate": 1.7512606797321185e-05, "loss": 1.1247, "step": 1650 }, { "epoch": 0.25, "grad_norm": 2.716118441546312, "learning_rate": 1.7509333870565364e-05, "loss": 0.9329, "step": 1651 }, { "epoch": 0.25, "grad_norm": 3.1907520999620735, "learning_rate": 1.7506059098225884e-05, "loss": 0.9952, "step": 1652 }, { "epoch": 0.25, "grad_norm": 3.0379065945927852, "learning_rate": 1.750278248110759e-05, "loss": 0.9498, "step": 1653 }, { "epoch": 0.25, "grad_norm": 3.037290982411241, "learning_rate": 1.749950402001579e-05, "loss": 0.9708, "step": 1654 }, { "epoch": 0.25, "grad_norm": 3.107830375806654, "learning_rate": 1.749622371575623e-05, "loss": 1.0052, "step": 1655 }, { "epoch": 0.25, "grad_norm": 2.839118227383374, "learning_rate": 1.749294156913512e-05, "loss": 0.9887, "step": 1656 }, { "epoch": 0.25, "grad_norm": 3.296029959912079, "learning_rate": 1.748965758095912e-05, "loss": 0.9337, "step": 1657 }, { "epoch": 0.25, "grad_norm": 3.0948027241708327, "learning_rate": 1.7486371752035346e-05, "loss": 0.9437, "step": 1658 }, { "epoch": 0.25, "grad_norm": 3.095667626766412, "learning_rate": 1.7483084083171353e-05, "loss": 0.9261, "step": 1659 }, { "epoch": 0.25, "grad_norm": 2.9205509097298443, "learning_rate": 1.7479794575175167e-05, "loss": 0.9924, "step": 1660 }, { "epoch": 0.25, "grad_norm": 2.982852037709161, "learning_rate": 1.7476503228855254e-05, "loss": 0.9751, "step": 1661 }, { "epoch": 0.25, "grad_norm": 3.0325740886574923, "learning_rate": 1.747321004502053e-05, "loss": 0.9428, "step": 1662 }, { "epoch": 0.25, "grad_norm": 9.321678349275457, "learning_rate": 1.746991502448037e-05, "loss": 1.2062, "step": 1663 }, { "epoch": 0.25, "grad_norm": 3.207975724754327, "learning_rate": 1.7466618168044604e-05, "loss": 0.9685, "step": 1664 }, { "epoch": 0.25, "grad_norm": 2.972145230828689, "learning_rate": 1.74633194765235e-05, "loss": 0.9395, "step": 1665 }, { "epoch": 0.26, "grad_norm": 3.0197396910817815, "learning_rate": 1.746001895072778e-05, "loss": 1.0408, "step": 1666 }, { "epoch": 0.26, "grad_norm": 3.391534577184481, "learning_rate": 1.7456716591468632e-05, "loss": 0.8894, "step": 1667 }, { "epoch": 0.26, "grad_norm": 2.6510303451802515, "learning_rate": 1.7453412399557673e-05, "loss": 0.9196, "step": 1668 }, { "epoch": 0.26, "grad_norm": 5.69220613921482, "learning_rate": 1.7450106375806988e-05, "loss": 1.0706, "step": 1669 }, { "epoch": 0.26, "grad_norm": 3.4899235907480253, "learning_rate": 1.7446798521029104e-05, "loss": 0.9742, "step": 1670 }, { "epoch": 0.26, "grad_norm": 3.403430566508585, "learning_rate": 1.744348883603699e-05, "loss": 1.0179, "step": 1671 }, { "epoch": 0.26, "grad_norm": 2.8503841944131776, "learning_rate": 1.7440177321644085e-05, "loss": 0.8921, "step": 1672 }, { "epoch": 0.26, "grad_norm": 3.117540564950011, "learning_rate": 1.7436863978664264e-05, "loss": 0.8934, "step": 1673 }, { "epoch": 0.26, "grad_norm": 3.0231089470141677, "learning_rate": 1.7433548807911846e-05, "loss": 1.0579, "step": 1674 }, { "epoch": 0.26, "grad_norm": 2.91070938996234, "learning_rate": 1.7430231810201616e-05, "loss": 0.9335, "step": 1675 }, { "epoch": 0.26, "grad_norm": 3.499268591374772, "learning_rate": 1.7426912986348797e-05, "loss": 0.9894, "step": 1676 }, { "epoch": 0.26, "grad_norm": 3.024082227265866, "learning_rate": 1.742359233716906e-05, "loss": 0.9656, "step": 1677 }, { "epoch": 0.26, "grad_norm": 2.9461564317791638, "learning_rate": 1.742026986347853e-05, "loss": 0.9275, "step": 1678 }, { "epoch": 0.26, "grad_norm": 8.29468643253041, "learning_rate": 1.7416945566093775e-05, "loss": 1.1882, "step": 1679 }, { "epoch": 0.26, "grad_norm": 3.3109274899198193, "learning_rate": 1.7413619445831815e-05, "loss": 0.9028, "step": 1680 }, { "epoch": 0.26, "grad_norm": 2.883852212607492, "learning_rate": 1.741029150351012e-05, "loss": 0.9543, "step": 1681 }, { "epoch": 0.26, "grad_norm": 3.1513321934934866, "learning_rate": 1.7406961739946605e-05, "loss": 1.012, "step": 1682 }, { "epoch": 0.26, "grad_norm": 3.0608795599630514, "learning_rate": 1.7403630155959626e-05, "loss": 0.9721, "step": 1683 }, { "epoch": 0.26, "grad_norm": 2.9213729732013003, "learning_rate": 1.7400296752368e-05, "loss": 1.0405, "step": 1684 }, { "epoch": 0.26, "grad_norm": 2.8757556386881253, "learning_rate": 1.7396961529990978e-05, "loss": 0.9275, "step": 1685 }, { "epoch": 0.26, "grad_norm": 3.0883656707818514, "learning_rate": 1.739362448964827e-05, "loss": 1.0599, "step": 1686 }, { "epoch": 0.26, "grad_norm": 2.8710024780202064, "learning_rate": 1.7390285632160025e-05, "loss": 0.94, "step": 1687 }, { "epoch": 0.26, "grad_norm": 2.9563153036212664, "learning_rate": 1.738694495834684e-05, "loss": 0.9108, "step": 1688 }, { "epoch": 0.26, "grad_norm": 3.0319132328021308, "learning_rate": 1.738360246902976e-05, "loss": 0.9292, "step": 1689 }, { "epoch": 0.26, "grad_norm": 2.9543356755124495, "learning_rate": 1.738025816503027e-05, "loss": 0.9085, "step": 1690 }, { "epoch": 0.26, "grad_norm": 3.127768570393247, "learning_rate": 1.7376912047170312e-05, "loss": 0.9394, "step": 1691 }, { "epoch": 0.26, "grad_norm": 3.149126382527672, "learning_rate": 1.7373564116272268e-05, "loss": 1.0142, "step": 1692 }, { "epoch": 0.26, "grad_norm": 3.16266300650271, "learning_rate": 1.7370214373158962e-05, "loss": 1.0159, "step": 1693 }, { "epoch": 0.26, "grad_norm": 2.7613314349272127, "learning_rate": 1.7366862818653668e-05, "loss": 0.8128, "step": 1694 }, { "epoch": 0.26, "grad_norm": 3.3646451143842917, "learning_rate": 1.7363509453580104e-05, "loss": 0.9478, "step": 1695 }, { "epoch": 0.26, "grad_norm": 2.7744638924974123, "learning_rate": 1.7360154278762437e-05, "loss": 0.9192, "step": 1696 }, { "epoch": 0.26, "grad_norm": 2.9470719493608946, "learning_rate": 1.7356797295025267e-05, "loss": 0.9279, "step": 1697 }, { "epoch": 0.26, "grad_norm": 3.0824620731472856, "learning_rate": 1.7353438503193657e-05, "loss": 0.9528, "step": 1698 }, { "epoch": 0.26, "grad_norm": 3.0844674185118532, "learning_rate": 1.7350077904093094e-05, "loss": 0.9883, "step": 1699 }, { "epoch": 0.26, "grad_norm": 3.093740006730346, "learning_rate": 1.734671549854952e-05, "loss": 0.8871, "step": 1700 }, { "epoch": 0.26, "grad_norm": 2.9962269846127003, "learning_rate": 1.7343351287389328e-05, "loss": 1.0069, "step": 1701 }, { "epoch": 0.26, "grad_norm": 3.0301979248106408, "learning_rate": 1.733998527143934e-05, "loss": 0.9567, "step": 1702 }, { "epoch": 0.26, "grad_norm": 2.900471623801645, "learning_rate": 1.733661745152683e-05, "loss": 0.8398, "step": 1703 }, { "epoch": 0.26, "grad_norm": 3.1403426337374203, "learning_rate": 1.733324782847951e-05, "loss": 1.0183, "step": 1704 }, { "epoch": 0.26, "grad_norm": 3.4374172819757125, "learning_rate": 1.7329876403125547e-05, "loss": 0.8892, "step": 1705 }, { "epoch": 0.26, "grad_norm": 3.030795047947954, "learning_rate": 1.7326503176293536e-05, "loss": 0.9275, "step": 1706 }, { "epoch": 0.26, "grad_norm": 2.8903678182970776, "learning_rate": 1.7323128148812525e-05, "loss": 0.8777, "step": 1707 }, { "epoch": 0.26, "grad_norm": 3.287031736581128, "learning_rate": 1.7319751321511998e-05, "loss": 0.9923, "step": 1708 }, { "epoch": 0.26, "grad_norm": 3.0188191593876836, "learning_rate": 1.7316372695221888e-05, "loss": 0.8788, "step": 1709 }, { "epoch": 0.26, "grad_norm": 3.019615610447504, "learning_rate": 1.7312992270772564e-05, "loss": 0.9385, "step": 1710 }, { "epoch": 0.26, "grad_norm": 2.8104085965673415, "learning_rate": 1.730961004899484e-05, "loss": 0.906, "step": 1711 }, { "epoch": 0.26, "grad_norm": 2.907845279180565, "learning_rate": 1.7306226030719972e-05, "loss": 0.9527, "step": 1712 }, { "epoch": 0.26, "grad_norm": 3.4993671502111137, "learning_rate": 1.7302840216779657e-05, "loss": 1.0291, "step": 1713 }, { "epoch": 0.26, "grad_norm": 4.106702965434131, "learning_rate": 1.7299452608006034e-05, "loss": 0.93, "step": 1714 }, { "epoch": 0.26, "grad_norm": 3.1101385424088748, "learning_rate": 1.7296063205231676e-05, "loss": 0.972, "step": 1715 }, { "epoch": 0.26, "grad_norm": 3.1293898506147744, "learning_rate": 1.729267200928961e-05, "loss": 1.104, "step": 1716 }, { "epoch": 0.26, "grad_norm": 2.9730924394570124, "learning_rate": 1.7289279021013298e-05, "loss": 1.0663, "step": 1717 }, { "epoch": 0.26, "grad_norm": 2.931559610901426, "learning_rate": 1.7285884241236636e-05, "loss": 0.8909, "step": 1718 }, { "epoch": 0.26, "grad_norm": 2.945747264549108, "learning_rate": 1.7282487670793967e-05, "loss": 0.9308, "step": 1719 }, { "epoch": 0.26, "grad_norm": 3.372598482443039, "learning_rate": 1.7279089310520075e-05, "loss": 0.9671, "step": 1720 }, { "epoch": 0.26, "grad_norm": 3.252864971605568, "learning_rate": 1.727568916125018e-05, "loss": 0.9744, "step": 1721 }, { "epoch": 0.26, "grad_norm": 3.0895113567236314, "learning_rate": 1.727228722381994e-05, "loss": 0.9229, "step": 1722 }, { "epoch": 0.26, "grad_norm": 2.882664161383922, "learning_rate": 1.7268883499065467e-05, "loss": 0.9749, "step": 1723 }, { "epoch": 0.26, "grad_norm": 3.1085081031820945, "learning_rate": 1.7265477987823287e-05, "loss": 1.0048, "step": 1724 }, { "epoch": 0.26, "grad_norm": 3.0004823214772047, "learning_rate": 1.7262070690930386e-05, "loss": 0.8328, "step": 1725 }, { "epoch": 0.26, "grad_norm": 3.038832818626629, "learning_rate": 1.7258661609224186e-05, "loss": 0.9228, "step": 1726 }, { "epoch": 0.26, "grad_norm": 3.192020710399445, "learning_rate": 1.7255250743542533e-05, "loss": 1.0519, "step": 1727 }, { "epoch": 0.26, "grad_norm": 2.6668903579950114, "learning_rate": 1.7251838094723732e-05, "loss": 0.8243, "step": 1728 }, { "epoch": 0.26, "grad_norm": 3.0670559820655092, "learning_rate": 1.7248423663606514e-05, "loss": 0.8683, "step": 1729 }, { "epoch": 0.26, "grad_norm": 2.8676037134225485, "learning_rate": 1.7245007451030046e-05, "loss": 0.9981, "step": 1730 }, { "epoch": 0.26, "grad_norm": 8.79497576470677, "learning_rate": 1.724158945783394e-05, "loss": 1.1708, "step": 1731 }, { "epoch": 0.27, "grad_norm": 2.99738917376387, "learning_rate": 1.723816968485825e-05, "loss": 1.0504, "step": 1732 }, { "epoch": 0.27, "grad_norm": 3.2178324841637616, "learning_rate": 1.7234748132943445e-05, "loss": 1.0838, "step": 1733 }, { "epoch": 0.27, "grad_norm": 3.4115054171733132, "learning_rate": 1.723132480293046e-05, "loss": 0.9939, "step": 1734 }, { "epoch": 0.27, "grad_norm": 2.7712272440729984, "learning_rate": 1.7227899695660647e-05, "loss": 0.8403, "step": 1735 }, { "epoch": 0.27, "grad_norm": 3.402421858255346, "learning_rate": 1.7224472811975803e-05, "loss": 0.9411, "step": 1736 }, { "epoch": 0.27, "grad_norm": 3.0223444412321774, "learning_rate": 1.722104415271816e-05, "loss": 0.9427, "step": 1737 }, { "epoch": 0.27, "grad_norm": 3.017597258777127, "learning_rate": 1.7217613718730385e-05, "loss": 0.9807, "step": 1738 }, { "epoch": 0.27, "grad_norm": 3.1635316587108906, "learning_rate": 1.7214181510855582e-05, "loss": 0.8457, "step": 1739 }, { "epoch": 0.27, "grad_norm": 3.7802900453298824, "learning_rate": 1.7210747529937296e-05, "loss": 0.9754, "step": 1740 }, { "epoch": 0.27, "grad_norm": 2.7253571537480985, "learning_rate": 1.72073117768195e-05, "loss": 0.8775, "step": 1741 }, { "epoch": 0.27, "grad_norm": 2.86569813967416, "learning_rate": 1.7203874252346607e-05, "loss": 0.9749, "step": 1742 }, { "epoch": 0.27, "grad_norm": 3.0864039605414426, "learning_rate": 1.720043495736346e-05, "loss": 0.8447, "step": 1743 }, { "epoch": 0.27, "grad_norm": 2.790503841646772, "learning_rate": 1.7196993892715344e-05, "loss": 0.9175, "step": 1744 }, { "epoch": 0.27, "grad_norm": 2.9046962258179656, "learning_rate": 1.719355105924798e-05, "loss": 0.9491, "step": 1745 }, { "epoch": 0.27, "grad_norm": 3.188869353506275, "learning_rate": 1.7190106457807515e-05, "loss": 0.8645, "step": 1746 }, { "epoch": 0.27, "grad_norm": 2.886800885546785, "learning_rate": 1.7186660089240535e-05, "loss": 0.8278, "step": 1747 }, { "epoch": 0.27, "grad_norm": 3.055015385983957, "learning_rate": 1.7183211954394063e-05, "loss": 0.9109, "step": 1748 }, { "epoch": 0.27, "grad_norm": 3.6597265454520023, "learning_rate": 1.7179762054115553e-05, "loss": 0.9243, "step": 1749 }, { "epoch": 0.27, "grad_norm": 3.2097148429469717, "learning_rate": 1.7176310389252897e-05, "loss": 0.9359, "step": 1750 }, { "epoch": 0.27, "grad_norm": 3.1357290092403733, "learning_rate": 1.717285696065441e-05, "loss": 0.8657, "step": 1751 }, { "epoch": 0.27, "grad_norm": 3.542033237018684, "learning_rate": 1.7169401769168855e-05, "loss": 0.9101, "step": 1752 }, { "epoch": 0.27, "grad_norm": 3.104045813974907, "learning_rate": 1.7165944815645418e-05, "loss": 0.9854, "step": 1753 }, { "epoch": 0.27, "grad_norm": 3.0470300166305817, "learning_rate": 1.7162486100933725e-05, "loss": 0.927, "step": 1754 }, { "epoch": 0.27, "grad_norm": 3.036460270383953, "learning_rate": 1.7159025625883823e-05, "loss": 0.8991, "step": 1755 }, { "epoch": 0.27, "grad_norm": 20.187501332975838, "learning_rate": 1.7155563391346207e-05, "loss": 1.2687, "step": 1756 }, { "epoch": 0.27, "grad_norm": 3.127249750494202, "learning_rate": 1.7152099398171796e-05, "loss": 1.001, "step": 1757 }, { "epoch": 0.27, "grad_norm": 3.086661240260967, "learning_rate": 1.7148633647211936e-05, "loss": 0.9008, "step": 1758 }, { "epoch": 0.27, "grad_norm": 3.6321929427561344, "learning_rate": 1.714516613931842e-05, "loss": 1.0084, "step": 1759 }, { "epoch": 0.27, "grad_norm": 3.03961360149895, "learning_rate": 1.7141696875343463e-05, "loss": 0.933, "step": 1760 }, { "epoch": 0.27, "grad_norm": 3.351519334357102, "learning_rate": 1.7138225856139707e-05, "loss": 0.9046, "step": 1761 }, { "epoch": 0.27, "grad_norm": 3.097830329883883, "learning_rate": 1.7134753082560236e-05, "loss": 1.0162, "step": 1762 }, { "epoch": 0.27, "grad_norm": 3.101611726104539, "learning_rate": 1.7131278555458558e-05, "loss": 0.9609, "step": 1763 }, { "epoch": 0.27, "grad_norm": 2.9844918351327556, "learning_rate": 1.7127802275688615e-05, "loss": 0.9106, "step": 1764 }, { "epoch": 0.27, "grad_norm": 3.2691109853035045, "learning_rate": 1.7124324244104782e-05, "loss": 0.8839, "step": 1765 }, { "epoch": 0.27, "grad_norm": 3.120653729928492, "learning_rate": 1.7120844461561857e-05, "loss": 0.8986, "step": 1766 }, { "epoch": 0.27, "grad_norm": 3.085688777871114, "learning_rate": 1.7117362928915073e-05, "loss": 0.9608, "step": 1767 }, { "epoch": 0.27, "grad_norm": 2.988794942793776, "learning_rate": 1.7113879647020098e-05, "loss": 0.9892, "step": 1768 }, { "epoch": 0.27, "grad_norm": 4.142175207922706, "learning_rate": 1.711039461673302e-05, "loss": 0.846, "step": 1769 }, { "epoch": 0.27, "grad_norm": 3.1702848317984467, "learning_rate": 1.7106907838910365e-05, "loss": 0.9765, "step": 1770 }, { "epoch": 0.27, "grad_norm": 18.480872514729015, "learning_rate": 1.7103419314409084e-05, "loss": 1.1757, "step": 1771 }, { "epoch": 0.27, "grad_norm": 2.7328141943559916, "learning_rate": 1.709992904408656e-05, "loss": 0.8963, "step": 1772 }, { "epoch": 0.27, "grad_norm": 3.0810657826869616, "learning_rate": 1.70964370288006e-05, "loss": 1.0049, "step": 1773 }, { "epoch": 0.27, "grad_norm": 3.1060015118018565, "learning_rate": 1.7092943269409442e-05, "loss": 0.8715, "step": 1774 }, { "epoch": 0.27, "grad_norm": 3.1102710293969507, "learning_rate": 1.7089447766771762e-05, "loss": 0.994, "step": 1775 }, { "epoch": 0.27, "grad_norm": 3.2154911009841762, "learning_rate": 1.708595052174665e-05, "loss": 0.9058, "step": 1776 }, { "epoch": 0.27, "grad_norm": 8.557012466247219, "learning_rate": 1.7082451535193635e-05, "loss": 1.167, "step": 1777 }, { "epoch": 0.27, "grad_norm": 2.783641336634145, "learning_rate": 1.7078950807972667e-05, "loss": 0.8901, "step": 1778 }, { "epoch": 0.27, "grad_norm": 2.9934657945221828, "learning_rate": 1.7075448340944125e-05, "loss": 0.9141, "step": 1779 }, { "epoch": 0.27, "grad_norm": 3.150396420366335, "learning_rate": 1.7071944134968817e-05, "loss": 0.8861, "step": 1780 }, { "epoch": 0.27, "grad_norm": 3.3958232210218937, "learning_rate": 1.7068438190907987e-05, "loss": 0.9127, "step": 1781 }, { "epoch": 0.27, "grad_norm": 3.0008540440950373, "learning_rate": 1.7064930509623287e-05, "loss": 0.9078, "step": 1782 }, { "epoch": 0.27, "grad_norm": 2.890349461668927, "learning_rate": 1.706142109197681e-05, "loss": 0.9415, "step": 1783 }, { "epoch": 0.27, "grad_norm": 2.865060978081574, "learning_rate": 1.7057909938831077e-05, "loss": 1.0334, "step": 1784 }, { "epoch": 0.27, "grad_norm": 3.231093802050666, "learning_rate": 1.7054397051049028e-05, "loss": 0.9508, "step": 1785 }, { "epoch": 0.27, "grad_norm": 2.8203188281216938, "learning_rate": 1.705088242949403e-05, "loss": 0.9999, "step": 1786 }, { "epoch": 0.27, "grad_norm": 3.0897255312247514, "learning_rate": 1.704736607502988e-05, "loss": 1.0371, "step": 1787 }, { "epoch": 0.27, "grad_norm": 2.9172885478578827, "learning_rate": 1.70438479885208e-05, "loss": 0.9821, "step": 1788 }, { "epoch": 0.27, "grad_norm": 2.9720055648574664, "learning_rate": 1.7040328170831438e-05, "loss": 0.8796, "step": 1789 }, { "epoch": 0.27, "grad_norm": 3.1382099352612167, "learning_rate": 1.703680662282686e-05, "loss": 0.8977, "step": 1790 }, { "epoch": 0.27, "grad_norm": 2.9843929380153864, "learning_rate": 1.7033283345372577e-05, "loss": 0.8788, "step": 1791 }, { "epoch": 0.27, "grad_norm": 2.7407504477935687, "learning_rate": 1.7029758339334493e-05, "loss": 0.9148, "step": 1792 }, { "epoch": 0.27, "grad_norm": 3.1593277458544846, "learning_rate": 1.7026231605578977e-05, "loss": 0.9894, "step": 1793 }, { "epoch": 0.27, "grad_norm": 2.8811698457573636, "learning_rate": 1.7022703144972783e-05, "loss": 0.9389, "step": 1794 }, { "epoch": 0.27, "grad_norm": 8.019957969047658, "learning_rate": 1.7019172958383117e-05, "loss": 1.1372, "step": 1795 }, { "epoch": 0.27, "grad_norm": 3.1741523508416507, "learning_rate": 1.70156410466776e-05, "loss": 0.9265, "step": 1796 }, { "epoch": 0.28, "grad_norm": 3.031657184880145, "learning_rate": 1.7012107410724272e-05, "loss": 1.009, "step": 1797 }, { "epoch": 0.28, "grad_norm": 3.046990652580771, "learning_rate": 1.7008572051391605e-05, "loss": 1.0, "step": 1798 }, { "epoch": 0.28, "grad_norm": 3.0803455961577884, "learning_rate": 1.7005034969548494e-05, "loss": 0.9237, "step": 1799 }, { "epoch": 0.28, "grad_norm": 2.935693160122931, "learning_rate": 1.7001496166064247e-05, "loss": 1.0491, "step": 1800 }, { "epoch": 0.28, "grad_norm": 2.7763607875745375, "learning_rate": 1.6997955641808607e-05, "loss": 0.9119, "step": 1801 }, { "epoch": 0.28, "grad_norm": 3.1123010538326614, "learning_rate": 1.6994413397651736e-05, "loss": 1.0113, "step": 1802 }, { "epoch": 0.28, "grad_norm": 3.130483097020963, "learning_rate": 1.6990869434464217e-05, "loss": 0.9616, "step": 1803 }, { "epoch": 0.28, "grad_norm": 3.3471090454376884, "learning_rate": 1.698732375311706e-05, "loss": 0.939, "step": 1804 }, { "epoch": 0.28, "grad_norm": 2.9468683554247557, "learning_rate": 1.698377635448169e-05, "loss": 0.8409, "step": 1805 }, { "epoch": 0.28, "grad_norm": 3.293572112069917, "learning_rate": 1.6980227239429957e-05, "loss": 0.9963, "step": 1806 }, { "epoch": 0.28, "grad_norm": 2.8948531850277086, "learning_rate": 1.6976676408834137e-05, "loss": 0.9784, "step": 1807 }, { "epoch": 0.28, "grad_norm": 3.139711415530516, "learning_rate": 1.6973123863566927e-05, "loss": 0.9313, "step": 1808 }, { "epoch": 0.28, "grad_norm": 3.1516894042204884, "learning_rate": 1.6969569604501437e-05, "loss": 0.9585, "step": 1809 }, { "epoch": 0.28, "grad_norm": 2.973758728621806, "learning_rate": 1.6966013632511207e-05, "loss": 0.8765, "step": 1810 }, { "epoch": 0.28, "grad_norm": 3.0817783963642067, "learning_rate": 1.6962455948470197e-05, "loss": 0.9369, "step": 1811 }, { "epoch": 0.28, "grad_norm": 2.790610696099825, "learning_rate": 1.6958896553252783e-05, "loss": 0.893, "step": 1812 }, { "epoch": 0.28, "grad_norm": 2.816385602375839, "learning_rate": 1.6955335447733768e-05, "loss": 0.9152, "step": 1813 }, { "epoch": 0.28, "grad_norm": 2.8898860914694873, "learning_rate": 1.6951772632788366e-05, "loss": 1.0058, "step": 1814 }, { "epoch": 0.28, "grad_norm": 2.9058069000027467, "learning_rate": 1.6948208109292224e-05, "loss": 0.9882, "step": 1815 }, { "epoch": 0.28, "grad_norm": 3.0880966053571, "learning_rate": 1.6944641878121397e-05, "loss": 0.9876, "step": 1816 }, { "epoch": 0.28, "grad_norm": 3.077793953569075, "learning_rate": 1.6941073940152367e-05, "loss": 0.905, "step": 1817 }, { "epoch": 0.28, "grad_norm": 3.197647594173069, "learning_rate": 1.693750429626203e-05, "loss": 0.9701, "step": 1818 }, { "epoch": 0.28, "grad_norm": 2.87426005324195, "learning_rate": 1.693393294732771e-05, "loss": 0.8155, "step": 1819 }, { "epoch": 0.28, "grad_norm": 2.8098810857134957, "learning_rate": 1.6930359894227137e-05, "loss": 1.0062, "step": 1820 }, { "epoch": 0.28, "grad_norm": 3.236471667352018, "learning_rate": 1.6926785137838475e-05, "loss": 1.1021, "step": 1821 }, { "epoch": 0.28, "grad_norm": 2.7811463349506473, "learning_rate": 1.6923208679040292e-05, "loss": 0.9271, "step": 1822 }, { "epoch": 0.28, "grad_norm": 3.119477770391288, "learning_rate": 1.6919630518711588e-05, "loss": 1.0113, "step": 1823 }, { "epoch": 0.28, "grad_norm": 2.7893762696875952, "learning_rate": 1.691605065773177e-05, "loss": 1.0375, "step": 1824 }, { "epoch": 0.28, "grad_norm": 2.6966638468986184, "learning_rate": 1.6912469096980664e-05, "loss": 0.9959, "step": 1825 }, { "epoch": 0.28, "grad_norm": 3.271609100047215, "learning_rate": 1.6908885837338525e-05, "loss": 0.9358, "step": 1826 }, { "epoch": 0.28, "grad_norm": 2.6535814458895586, "learning_rate": 1.6905300879686012e-05, "loss": 0.9314, "step": 1827 }, { "epoch": 0.28, "grad_norm": 3.0781102996355814, "learning_rate": 1.6901714224904215e-05, "loss": 0.9368, "step": 1828 }, { "epoch": 0.28, "grad_norm": 2.8514555295039825, "learning_rate": 1.689812587387462e-05, "loss": 0.9585, "step": 1829 }, { "epoch": 0.28, "grad_norm": 3.1607038231576516, "learning_rate": 1.6894535827479152e-05, "loss": 1.0184, "step": 1830 }, { "epoch": 0.28, "grad_norm": 2.7494442796069913, "learning_rate": 1.6890944086600145e-05, "loss": 0.9984, "step": 1831 }, { "epoch": 0.28, "grad_norm": 3.2687465123361275, "learning_rate": 1.6887350652120346e-05, "loss": 0.9707, "step": 1832 }, { "epoch": 0.28, "grad_norm": 3.078713456184605, "learning_rate": 1.688375552492292e-05, "loss": 0.9926, "step": 1833 }, { "epoch": 0.28, "grad_norm": 2.7217043180958616, "learning_rate": 1.688015870589144e-05, "loss": 0.9223, "step": 1834 }, { "epoch": 0.28, "grad_norm": 2.806529041950445, "learning_rate": 1.6876560195909916e-05, "loss": 0.9107, "step": 1835 }, { "epoch": 0.28, "grad_norm": 15.422810929651483, "learning_rate": 1.687295999586276e-05, "loss": 1.2869, "step": 1836 }, { "epoch": 0.28, "grad_norm": 3.194812363041196, "learning_rate": 1.6869358106634794e-05, "loss": 1.0234, "step": 1837 }, { "epoch": 0.28, "grad_norm": 3.0263600740187413, "learning_rate": 1.686575452911126e-05, "loss": 0.9931, "step": 1838 }, { "epoch": 0.28, "grad_norm": 3.217364912389536, "learning_rate": 1.6862149264177826e-05, "loss": 0.9718, "step": 1839 }, { "epoch": 0.28, "grad_norm": 2.864268872192165, "learning_rate": 1.6858542312720555e-05, "loss": 0.971, "step": 1840 }, { "epoch": 0.28, "grad_norm": 2.9868935967262358, "learning_rate": 1.685493367562594e-05, "loss": 0.9497, "step": 1841 }, { "epoch": 0.28, "grad_norm": 3.02682450886872, "learning_rate": 1.6851323353780883e-05, "loss": 1.0055, "step": 1842 }, { "epoch": 0.28, "grad_norm": 2.985454978585505, "learning_rate": 1.6847711348072694e-05, "loss": 0.9894, "step": 1843 }, { "epoch": 0.28, "grad_norm": 3.062161794366507, "learning_rate": 1.684409765938911e-05, "loss": 0.9619, "step": 1844 }, { "epoch": 0.28, "grad_norm": 2.920527000454507, "learning_rate": 1.684048228861827e-05, "loss": 0.9533, "step": 1845 }, { "epoch": 0.28, "grad_norm": 2.8508734491132364, "learning_rate": 1.6836865236648736e-05, "loss": 0.8948, "step": 1846 }, { "epoch": 0.28, "grad_norm": 3.109326001566116, "learning_rate": 1.683324650436947e-05, "loss": 0.866, "step": 1847 }, { "epoch": 0.28, "grad_norm": 2.9223301296017796, "learning_rate": 1.682962609266986e-05, "loss": 0.9989, "step": 1848 }, { "epoch": 0.28, "grad_norm": 2.9451164618486563, "learning_rate": 1.68260040024397e-05, "loss": 0.9472, "step": 1849 }, { "epoch": 0.28, "grad_norm": 3.06203765888581, "learning_rate": 1.68223802345692e-05, "loss": 0.9352, "step": 1850 }, { "epoch": 0.28, "grad_norm": 2.947449793617199, "learning_rate": 1.6818754789948974e-05, "loss": 0.9157, "step": 1851 }, { "epoch": 0.28, "grad_norm": 3.2220571907558564, "learning_rate": 1.6815127669470066e-05, "loss": 1.0802, "step": 1852 }, { "epoch": 0.28, "grad_norm": 2.9745392570414517, "learning_rate": 1.6811498874023914e-05, "loss": 0.9811, "step": 1853 }, { "epoch": 0.28, "grad_norm": 2.9466446307315883, "learning_rate": 1.680786840450237e-05, "loss": 0.8016, "step": 1854 }, { "epoch": 0.28, "grad_norm": 3.221723283817632, "learning_rate": 1.6804236261797707e-05, "loss": 0.9568, "step": 1855 }, { "epoch": 0.28, "grad_norm": 2.8783807082840136, "learning_rate": 1.6800602446802604e-05, "loss": 1.0294, "step": 1856 }, { "epoch": 0.28, "grad_norm": 3.3234673136431776, "learning_rate": 1.679696696041015e-05, "loss": 1.0243, "step": 1857 }, { "epoch": 0.28, "grad_norm": 3.1457564055469462, "learning_rate": 1.6793329803513845e-05, "loss": 1.0493, "step": 1858 }, { "epoch": 0.28, "grad_norm": 2.833612508072633, "learning_rate": 1.67896909770076e-05, "loss": 0.9366, "step": 1859 }, { "epoch": 0.28, "grad_norm": 3.0762053615072427, "learning_rate": 1.678605048178574e-05, "loss": 0.9819, "step": 1860 }, { "epoch": 0.28, "grad_norm": 3.2781884938178423, "learning_rate": 1.678240831874299e-05, "loss": 0.9688, "step": 1861 }, { "epoch": 0.29, "grad_norm": 3.3692602382750247, "learning_rate": 1.677876448877449e-05, "loss": 0.8875, "step": 1862 }, { "epoch": 0.29, "grad_norm": 3.013326890826228, "learning_rate": 1.6775118992775805e-05, "loss": 0.8707, "step": 1863 }, { "epoch": 0.29, "grad_norm": 3.459040885528723, "learning_rate": 1.6771471831642885e-05, "loss": 1.0118, "step": 1864 }, { "epoch": 0.29, "grad_norm": 2.901135398632171, "learning_rate": 1.67678230062721e-05, "loss": 0.9964, "step": 1865 }, { "epoch": 0.29, "grad_norm": 3.146941247952338, "learning_rate": 1.6764172517560232e-05, "loss": 1.0776, "step": 1866 }, { "epoch": 0.29, "grad_norm": 3.239123497963529, "learning_rate": 1.6760520366404465e-05, "loss": 0.935, "step": 1867 }, { "epoch": 0.29, "grad_norm": 3.1929933652649822, "learning_rate": 1.67568665537024e-05, "loss": 0.8425, "step": 1868 }, { "epoch": 0.29, "grad_norm": 3.033169365361725, "learning_rate": 1.675321108035204e-05, "loss": 0.9379, "step": 1869 }, { "epoch": 0.29, "grad_norm": 3.0333266108265007, "learning_rate": 1.6749553947251796e-05, "loss": 0.8782, "step": 1870 }, { "epoch": 0.29, "grad_norm": 2.876592953022209, "learning_rate": 1.674589515530049e-05, "loss": 0.8542, "step": 1871 }, { "epoch": 0.29, "grad_norm": 3.302964058161183, "learning_rate": 1.6742234705397353e-05, "loss": 0.9858, "step": 1872 }, { "epoch": 0.29, "grad_norm": 3.124949533715328, "learning_rate": 1.6738572598442017e-05, "loss": 0.9404, "step": 1873 }, { "epoch": 0.29, "grad_norm": 3.0780370387166376, "learning_rate": 1.6734908835334528e-05, "loss": 0.9963, "step": 1874 }, { "epoch": 0.29, "grad_norm": 2.8498009806918034, "learning_rate": 1.673124341697533e-05, "loss": 0.9845, "step": 1875 }, { "epoch": 0.29, "grad_norm": 2.816079342172047, "learning_rate": 1.672757634426529e-05, "loss": 0.9985, "step": 1876 }, { "epoch": 0.29, "grad_norm": 2.7436541873730276, "learning_rate": 1.6723907618105664e-05, "loss": 0.9205, "step": 1877 }, { "epoch": 0.29, "grad_norm": 2.95311268150872, "learning_rate": 1.6720237239398125e-05, "loss": 0.8629, "step": 1878 }, { "epoch": 0.29, "grad_norm": 19.916438316118874, "learning_rate": 1.671656520904475e-05, "loss": 1.22, "step": 1879 }, { "epoch": 0.29, "grad_norm": 3.0861564067907, "learning_rate": 1.671289152794802e-05, "loss": 0.9189, "step": 1880 }, { "epoch": 0.29, "grad_norm": 3.089133404397287, "learning_rate": 1.670921619701082e-05, "loss": 0.9411, "step": 1881 }, { "epoch": 0.29, "grad_norm": 3.221747175011406, "learning_rate": 1.6705539217136447e-05, "loss": 0.9864, "step": 1882 }, { "epoch": 0.29, "grad_norm": 2.996888161089025, "learning_rate": 1.6701860589228597e-05, "loss": 1.0954, "step": 1883 }, { "epoch": 0.29, "grad_norm": 8.534999187383088, "learning_rate": 1.6698180314191375e-05, "loss": 1.1416, "step": 1884 }, { "epoch": 0.29, "grad_norm": 3.250891562903199, "learning_rate": 1.6694498392929293e-05, "loss": 1.0242, "step": 1885 }, { "epoch": 0.29, "grad_norm": 3.1721803755932974, "learning_rate": 1.669081482634726e-05, "loss": 1.0573, "step": 1886 }, { "epoch": 0.29, "grad_norm": 3.0931656989393654, "learning_rate": 1.668712961535059e-05, "loss": 0.9245, "step": 1887 }, { "epoch": 0.29, "grad_norm": 2.7676923458266494, "learning_rate": 1.668344276084501e-05, "loss": 0.9846, "step": 1888 }, { "epoch": 0.29, "grad_norm": 3.2333026082841565, "learning_rate": 1.6679754263736644e-05, "loss": 0.9653, "step": 1889 }, { "epoch": 0.29, "grad_norm": 2.9318513606737286, "learning_rate": 1.6676064124932016e-05, "loss": 0.942, "step": 1890 }, { "epoch": 0.29, "grad_norm": 2.804796108438422, "learning_rate": 1.6672372345338067e-05, "loss": 1.0292, "step": 1891 }, { "epoch": 0.29, "grad_norm": 2.7689755284678292, "learning_rate": 1.666867892586213e-05, "loss": 0.9791, "step": 1892 }, { "epoch": 0.29, "grad_norm": 2.730471857480553, "learning_rate": 1.6664983867411947e-05, "loss": 0.9998, "step": 1893 }, { "epoch": 0.29, "grad_norm": 2.9574138452567618, "learning_rate": 1.6661287170895647e-05, "loss": 1.0932, "step": 1894 }, { "epoch": 0.29, "grad_norm": 2.9343442651527223, "learning_rate": 1.665758883722179e-05, "loss": 0.9637, "step": 1895 }, { "epoch": 0.29, "grad_norm": 2.928698965252209, "learning_rate": 1.6653888867299312e-05, "loss": 0.9635, "step": 1896 }, { "epoch": 0.29, "grad_norm": 3.0528707428961575, "learning_rate": 1.6650187262037567e-05, "loss": 1.0008, "step": 1897 }, { "epoch": 0.29, "grad_norm": 3.0335541906489136, "learning_rate": 1.6646484022346305e-05, "loss": 0.8778, "step": 1898 }, { "epoch": 0.29, "grad_norm": 3.2308635269804653, "learning_rate": 1.6642779149135677e-05, "loss": 0.9607, "step": 1899 }, { "epoch": 0.29, "grad_norm": 3.0332734710472575, "learning_rate": 1.663907264331624e-05, "loss": 0.8383, "step": 1900 }, { "epoch": 0.29, "grad_norm": 2.840711811673916, "learning_rate": 1.6635364505798946e-05, "loss": 0.9219, "step": 1901 }, { "epoch": 0.29, "grad_norm": 16.051039006193275, "learning_rate": 1.663165473749515e-05, "loss": 1.1543, "step": 1902 }, { "epoch": 0.29, "grad_norm": 3.045344183831331, "learning_rate": 1.6627943339316616e-05, "loss": 0.9771, "step": 1903 }, { "epoch": 0.29, "grad_norm": 2.9859000516392347, "learning_rate": 1.662423031217549e-05, "loss": 0.9071, "step": 1904 }, { "epoch": 0.29, "grad_norm": 3.274059140274478, "learning_rate": 1.6620515656984343e-05, "loss": 0.8979, "step": 1905 }, { "epoch": 0.29, "grad_norm": 2.9111963302746187, "learning_rate": 1.6616799374656124e-05, "loss": 0.8854, "step": 1906 }, { "epoch": 0.29, "grad_norm": 3.6802138181802873, "learning_rate": 1.6613081466104196e-05, "loss": 0.9194, "step": 1907 }, { "epoch": 0.29, "grad_norm": 2.973806688301168, "learning_rate": 1.660936193224231e-05, "loss": 0.9397, "step": 1908 }, { "epoch": 0.29, "grad_norm": 2.9352989064745683, "learning_rate": 1.660564077398463e-05, "loss": 0.9442, "step": 1909 }, { "epoch": 0.29, "grad_norm": 2.9344758621160834, "learning_rate": 1.6601917992245712e-05, "loss": 0.9395, "step": 1910 }, { "epoch": 0.29, "grad_norm": 2.8050275279211845, "learning_rate": 1.6598193587940508e-05, "loss": 1.0315, "step": 1911 }, { "epoch": 0.29, "grad_norm": 3.226670654091148, "learning_rate": 1.659446756198437e-05, "loss": 0.9383, "step": 1912 }, { "epoch": 0.29, "grad_norm": 3.112674249923903, "learning_rate": 1.6590739915293056e-05, "loss": 1.0231, "step": 1913 }, { "epoch": 0.29, "grad_norm": 2.665704117888447, "learning_rate": 1.6587010648782717e-05, "loss": 0.9799, "step": 1914 }, { "epoch": 0.29, "grad_norm": 9.465238430343106, "learning_rate": 1.65832797633699e-05, "loss": 1.1079, "step": 1915 }, { "epoch": 0.29, "grad_norm": 2.911361683645425, "learning_rate": 1.657954725997155e-05, "loss": 0.9573, "step": 1916 }, { "epoch": 0.29, "grad_norm": 2.927834883238085, "learning_rate": 1.6575813139505016e-05, "loss": 0.9304, "step": 1917 }, { "epoch": 0.29, "grad_norm": 2.880364399662898, "learning_rate": 1.6572077402888037e-05, "loss": 0.8702, "step": 1918 }, { "epoch": 0.29, "grad_norm": 2.9971621634760734, "learning_rate": 1.6568340051038754e-05, "loss": 0.963, "step": 1919 }, { "epoch": 0.29, "grad_norm": 2.7330086419109856, "learning_rate": 1.6564601084875703e-05, "loss": 0.9331, "step": 1920 }, { "epoch": 0.29, "grad_norm": 3.2160871480825253, "learning_rate": 1.6560860505317813e-05, "loss": 1.0767, "step": 1921 }, { "epoch": 0.29, "grad_norm": 3.6222550750639892, "learning_rate": 1.655711831328442e-05, "loss": 0.8794, "step": 1922 }, { "epoch": 0.29, "grad_norm": 3.4903844136454523, "learning_rate": 1.6553374509695244e-05, "loss": 0.8383, "step": 1923 }, { "epoch": 0.29, "grad_norm": 2.8607341149397123, "learning_rate": 1.6549629095470413e-05, "loss": 0.9757, "step": 1924 }, { "epoch": 0.29, "grad_norm": 3.501136761886263, "learning_rate": 1.6545882071530443e-05, "loss": 1.1234, "step": 1925 }, { "epoch": 0.29, "grad_norm": 3.003942035104415, "learning_rate": 1.654213343879624e-05, "loss": 1.0772, "step": 1926 }, { "epoch": 0.29, "grad_norm": 6.455659166595698, "learning_rate": 1.6538383198189122e-05, "loss": 1.0843, "step": 1927 }, { "epoch": 0.3, "grad_norm": 2.885447760378492, "learning_rate": 1.653463135063079e-05, "loss": 0.9104, "step": 1928 }, { "epoch": 0.3, "grad_norm": 3.144702023071639, "learning_rate": 1.6530877897043343e-05, "loss": 1.0566, "step": 1929 }, { "epoch": 0.3, "grad_norm": 3.1471032152462115, "learning_rate": 1.6527122838349274e-05, "loss": 1.0278, "step": 1930 }, { "epoch": 0.3, "grad_norm": 2.890264832007854, "learning_rate": 1.652336617547147e-05, "loss": 0.9163, "step": 1931 }, { "epoch": 0.3, "grad_norm": 2.9317766549252466, "learning_rate": 1.6519607909333216e-05, "loss": 0.8429, "step": 1932 }, { "epoch": 0.3, "grad_norm": 3.4212716262094105, "learning_rate": 1.6515848040858186e-05, "loss": 0.9769, "step": 1933 }, { "epoch": 0.3, "grad_norm": 3.248556388386718, "learning_rate": 1.6512086570970455e-05, "loss": 0.8718, "step": 1934 }, { "epoch": 0.3, "grad_norm": 2.903299186840235, "learning_rate": 1.650832350059448e-05, "loss": 1.0638, "step": 1935 }, { "epoch": 0.3, "grad_norm": 2.8786177507895068, "learning_rate": 1.650455883065512e-05, "loss": 0.991, "step": 1936 }, { "epoch": 0.3, "grad_norm": 2.8504920475696833, "learning_rate": 1.650079256207763e-05, "loss": 0.9286, "step": 1937 }, { "epoch": 0.3, "grad_norm": 3.108477746343841, "learning_rate": 1.6497024695787646e-05, "loss": 0.9307, "step": 1938 }, { "epoch": 0.3, "grad_norm": 2.7398926753706903, "learning_rate": 1.649325523271121e-05, "loss": 0.9631, "step": 1939 }, { "epoch": 0.3, "grad_norm": 3.05048561325502, "learning_rate": 1.6489484173774747e-05, "loss": 0.934, "step": 1940 }, { "epoch": 0.3, "grad_norm": 3.1390413170814235, "learning_rate": 1.648571151990508e-05, "loss": 0.9479, "step": 1941 }, { "epoch": 0.3, "grad_norm": 6.797072943904557, "learning_rate": 1.648193727202942e-05, "loss": 1.1066, "step": 1942 }, { "epoch": 0.3, "grad_norm": 2.838565494232607, "learning_rate": 1.647816143107537e-05, "loss": 1.0361, "step": 1943 }, { "epoch": 0.3, "grad_norm": 3.0395818003900956, "learning_rate": 1.6474383997970928e-05, "loss": 0.9884, "step": 1944 }, { "epoch": 0.3, "grad_norm": 2.9470028853067123, "learning_rate": 1.6470604973644483e-05, "loss": 0.9513, "step": 1945 }, { "epoch": 0.3, "grad_norm": 2.7727680957102336, "learning_rate": 1.6466824359024803e-05, "loss": 0.966, "step": 1946 }, { "epoch": 0.3, "grad_norm": 6.713985789430561, "learning_rate": 1.646304215504107e-05, "loss": 1.1203, "step": 1947 }, { "epoch": 0.3, "grad_norm": 3.0833960862474434, "learning_rate": 1.645925836262284e-05, "loss": 0.9877, "step": 1948 }, { "epoch": 0.3, "grad_norm": 2.7057696057868297, "learning_rate": 1.6455472982700055e-05, "loss": 0.8799, "step": 1949 }, { "epoch": 0.3, "grad_norm": 2.884971897595675, "learning_rate": 1.6451686016203065e-05, "loss": 0.9695, "step": 1950 }, { "epoch": 0.3, "grad_norm": 3.159371915076138, "learning_rate": 1.6447897464062593e-05, "loss": 0.9409, "step": 1951 }, { "epoch": 0.3, "grad_norm": 3.0243298134150836, "learning_rate": 1.644410732720977e-05, "loss": 0.904, "step": 1952 }, { "epoch": 0.3, "grad_norm": 2.8079014057457963, "learning_rate": 1.644031560657609e-05, "loss": 0.9069, "step": 1953 }, { "epoch": 0.3, "grad_norm": 3.102887289670685, "learning_rate": 1.6436522303093462e-05, "loss": 0.8864, "step": 1954 }, { "epoch": 0.3, "grad_norm": 2.8903116065538312, "learning_rate": 1.6432727417694172e-05, "loss": 0.9538, "step": 1955 }, { "epoch": 0.3, "grad_norm": 3.2832123605271866, "learning_rate": 1.6428930951310895e-05, "loss": 0.9247, "step": 1956 }, { "epoch": 0.3, "grad_norm": 3.2273969880571656, "learning_rate": 1.6425132904876696e-05, "loss": 0.9876, "step": 1957 }, { "epoch": 0.3, "grad_norm": 3.013714545246212, "learning_rate": 1.642133327932503e-05, "loss": 0.9537, "step": 1958 }, { "epoch": 0.3, "grad_norm": 2.7463430437492122, "learning_rate": 1.6417532075589733e-05, "loss": 0.9166, "step": 1959 }, { "epoch": 0.3, "grad_norm": 2.844332090271417, "learning_rate": 1.6413729294605043e-05, "loss": 0.9816, "step": 1960 }, { "epoch": 0.3, "grad_norm": 8.087235521306631, "learning_rate": 1.6409924937305567e-05, "loss": 1.0865, "step": 1961 }, { "epoch": 0.3, "grad_norm": 2.752478659387522, "learning_rate": 1.640611900462632e-05, "loss": 0.9745, "step": 1962 }, { "epoch": 0.3, "grad_norm": 2.8605402043191, "learning_rate": 1.6402311497502685e-05, "loss": 0.9028, "step": 1963 }, { "epoch": 0.3, "grad_norm": 2.8371013619172736, "learning_rate": 1.6398502416870444e-05, "loss": 0.943, "step": 1964 }, { "epoch": 0.3, "grad_norm": 3.1775931381327656, "learning_rate": 1.6394691763665762e-05, "loss": 0.8965, "step": 1965 }, { "epoch": 0.3, "grad_norm": 3.1302370672192414, "learning_rate": 1.6390879538825188e-05, "loss": 1.0056, "step": 1966 }, { "epoch": 0.3, "grad_norm": 3.0799939612715064, "learning_rate": 1.6387065743285667e-05, "loss": 0.9704, "step": 1967 }, { "epoch": 0.3, "grad_norm": 2.6241877817898187, "learning_rate": 1.6383250377984515e-05, "loss": 0.9161, "step": 1968 }, { "epoch": 0.3, "grad_norm": 3.0591703288395715, "learning_rate": 1.637943344385944e-05, "loss": 0.9756, "step": 1969 }, { "epoch": 0.3, "grad_norm": 2.780749719595347, "learning_rate": 1.6375614941848553e-05, "loss": 0.9215, "step": 1970 }, { "epoch": 0.3, "grad_norm": 2.78679062321962, "learning_rate": 1.637179487289032e-05, "loss": 0.9447, "step": 1971 }, { "epoch": 0.3, "grad_norm": 2.916558730513451, "learning_rate": 1.6367973237923606e-05, "loss": 1.017, "step": 1972 }, { "epoch": 0.3, "grad_norm": 2.8714051172595014, "learning_rate": 1.636415003788767e-05, "loss": 0.9574, "step": 1973 }, { "epoch": 0.3, "grad_norm": 2.424562176046629, "learning_rate": 1.6360325273722148e-05, "loss": 0.7982, "step": 1974 }, { "epoch": 0.3, "grad_norm": 2.7338308340261883, "learning_rate": 1.6356498946367052e-05, "loss": 0.9081, "step": 1975 }, { "epoch": 0.3, "grad_norm": 3.27037857350359, "learning_rate": 1.635267105676279e-05, "loss": 1.0226, "step": 1976 }, { "epoch": 0.3, "grad_norm": 3.1300143332121175, "learning_rate": 1.634884160585015e-05, "loss": 0.9527, "step": 1977 }, { "epoch": 0.3, "grad_norm": 3.106799123943485, "learning_rate": 1.6345010594570303e-05, "loss": 0.9228, "step": 1978 }, { "epoch": 0.3, "grad_norm": 2.9397095069102734, "learning_rate": 1.6341178023864803e-05, "loss": 0.9816, "step": 1979 }, { "epoch": 0.3, "grad_norm": 3.0107007075493217, "learning_rate": 1.6337343894675594e-05, "loss": 0.8376, "step": 1980 }, { "epoch": 0.3, "grad_norm": 3.238057799122706, "learning_rate": 1.6333508207944987e-05, "loss": 0.9929, "step": 1981 }, { "epoch": 0.3, "grad_norm": 2.767232716037717, "learning_rate": 1.6329670964615698e-05, "loss": 0.9193, "step": 1982 }, { "epoch": 0.3, "grad_norm": 3.068081714153525, "learning_rate": 1.6325832165630804e-05, "loss": 0.9252, "step": 1983 }, { "epoch": 0.3, "grad_norm": 10.403815270134738, "learning_rate": 1.6321991811933778e-05, "loss": 1.1198, "step": 1984 }, { "epoch": 0.3, "grad_norm": 6.99459133747253, "learning_rate": 1.6318149904468476e-05, "loss": 1.2209, "step": 1985 }, { "epoch": 0.3, "grad_norm": 3.3344886744220963, "learning_rate": 1.6314306444179124e-05, "loss": 0.9521, "step": 1986 }, { "epoch": 0.3, "grad_norm": 8.507952799865198, "learning_rate": 1.631046143201034e-05, "loss": 1.1653, "step": 1987 }, { "epoch": 0.3, "grad_norm": 3.0183257520527236, "learning_rate": 1.6306614868907118e-05, "loss": 0.9675, "step": 1988 }, { "epoch": 0.3, "grad_norm": 3.2917705295673354, "learning_rate": 1.6302766755814837e-05, "loss": 0.9624, "step": 1989 }, { "epoch": 0.3, "grad_norm": 2.6996186995769325, "learning_rate": 1.6298917093679256e-05, "loss": 0.9873, "step": 1990 }, { "epoch": 0.3, "grad_norm": 2.92846853659815, "learning_rate": 1.6295065883446514e-05, "loss": 1.0206, "step": 1991 }, { "epoch": 0.3, "grad_norm": 2.6153606760314583, "learning_rate": 1.6291213126063127e-05, "loss": 0.8827, "step": 1992 }, { "epoch": 0.31, "grad_norm": 3.0482692285445476, "learning_rate": 1.6287358822476003e-05, "loss": 0.9584, "step": 1993 }, { "epoch": 0.31, "grad_norm": 2.945888562890866, "learning_rate": 1.628350297363241e-05, "loss": 1.0334, "step": 1994 }, { "epoch": 0.31, "grad_norm": 2.9809895023289705, "learning_rate": 1.6279645580480016e-05, "loss": 0.8799, "step": 1995 }, { "epoch": 0.31, "grad_norm": 3.040358876025292, "learning_rate": 1.6275786643966857e-05, "loss": 0.9606, "step": 1996 }, { "epoch": 0.31, "grad_norm": 3.120304648204235, "learning_rate": 1.627192616504135e-05, "loss": 0.9562, "step": 1997 }, { "epoch": 0.31, "grad_norm": 2.8891192132781764, "learning_rate": 1.6268064144652298e-05, "loss": 0.9539, "step": 1998 }, { "epoch": 0.31, "grad_norm": 12.618369413745189, "learning_rate": 1.6264200583748872e-05, "loss": 1.1742, "step": 1999 }, { "epoch": 0.31, "grad_norm": 2.562754164287327, "learning_rate": 1.6260335483280628e-05, "loss": 0.9873, "step": 2000 }, { "epoch": 0.31, "grad_norm": 2.7923306530773155, "learning_rate": 1.6256468844197503e-05, "loss": 0.906, "step": 2001 }, { "epoch": 0.31, "grad_norm": 2.7896490661006172, "learning_rate": 1.6252600667449803e-05, "loss": 1.0057, "step": 2002 }, { "epoch": 0.31, "grad_norm": 3.0281024080777756, "learning_rate": 1.624873095398822e-05, "loss": 0.9022, "step": 2003 }, { "epoch": 0.31, "grad_norm": 2.881684994302156, "learning_rate": 1.6244859704763822e-05, "loss": 0.9479, "step": 2004 }, { "epoch": 0.31, "grad_norm": 2.9444445700411137, "learning_rate": 1.6240986920728047e-05, "loss": 0.8036, "step": 2005 }, { "epoch": 0.31, "grad_norm": 2.7270622289384363, "learning_rate": 1.6237112602832725e-05, "loss": 0.8149, "step": 2006 }, { "epoch": 0.31, "grad_norm": 2.932690790849735, "learning_rate": 1.6233236752030055e-05, "loss": 0.894, "step": 2007 }, { "epoch": 0.31, "grad_norm": 2.9384491189874713, "learning_rate": 1.6229359369272604e-05, "loss": 0.9274, "step": 2008 }, { "epoch": 0.31, "grad_norm": 3.2749757856588406, "learning_rate": 1.622548045551333e-05, "loss": 1.0205, "step": 2009 }, { "epoch": 0.31, "grad_norm": 3.2986011656446235, "learning_rate": 1.6221600011705562e-05, "loss": 0.9656, "step": 2010 }, { "epoch": 0.31, "grad_norm": 2.899694894395896, "learning_rate": 1.6217718038803004e-05, "loss": 0.9747, "step": 2011 }, { "epoch": 0.31, "grad_norm": 2.805963080021518, "learning_rate": 1.621383453775973e-05, "loss": 0.9258, "step": 2012 }, { "epoch": 0.31, "grad_norm": 3.079907285065247, "learning_rate": 1.6209949509530206e-05, "loss": 1.0381, "step": 2013 }, { "epoch": 0.31, "grad_norm": 2.85919475322715, "learning_rate": 1.6206062955069252e-05, "loss": 0.9665, "step": 2014 }, { "epoch": 0.31, "grad_norm": 3.009738760174339, "learning_rate": 1.6202174875332082e-05, "loss": 0.9749, "step": 2015 }, { "epoch": 0.31, "grad_norm": 2.7899818371791323, "learning_rate": 1.619828527127427e-05, "loss": 0.8872, "step": 2016 }, { "epoch": 0.31, "grad_norm": 2.9113651910730733, "learning_rate": 1.619439414385178e-05, "loss": 0.93, "step": 2017 }, { "epoch": 0.31, "grad_norm": 3.1385972777593194, "learning_rate": 1.6190501494020938e-05, "loss": 0.878, "step": 2018 }, { "epoch": 0.31, "grad_norm": 2.902655972971181, "learning_rate": 1.6186607322738446e-05, "loss": 0.9324, "step": 2019 }, { "epoch": 0.31, "grad_norm": 36.255687160999884, "learning_rate": 1.6182711630961385e-05, "loss": 1.2675, "step": 2020 }, { "epoch": 0.31, "grad_norm": 2.8813953340320717, "learning_rate": 1.6178814419647207e-05, "loss": 0.9405, "step": 2021 }, { "epoch": 0.31, "grad_norm": 3.146214173693879, "learning_rate": 1.6174915689753733e-05, "loss": 0.9028, "step": 2022 }, { "epoch": 0.31, "grad_norm": 3.295327922343884, "learning_rate": 1.6171015442239167e-05, "loss": 0.8791, "step": 2023 }, { "epoch": 0.31, "grad_norm": 2.7436426736490667, "learning_rate": 1.6167113678062074e-05, "loss": 0.8547, "step": 2024 }, { "epoch": 0.31, "grad_norm": 2.983002667352024, "learning_rate": 1.6163210398181405e-05, "loss": 0.9734, "step": 2025 }, { "epoch": 0.31, "grad_norm": 3.21413114511768, "learning_rate": 1.6159305603556474e-05, "loss": 0.9882, "step": 2026 }, { "epoch": 0.31, "grad_norm": 2.8282094732335454, "learning_rate": 1.615539929514697e-05, "loss": 0.921, "step": 2027 }, { "epoch": 0.31, "grad_norm": 3.1604216542592587, "learning_rate": 1.615149147391295e-05, "loss": 0.9523, "step": 2028 }, { "epoch": 0.31, "grad_norm": 2.856666507616215, "learning_rate": 1.614758214081486e-05, "loss": 0.936, "step": 2029 }, { "epoch": 0.31, "grad_norm": 2.991532447251548, "learning_rate": 1.6143671296813487e-05, "loss": 0.9874, "step": 2030 }, { "epoch": 0.31, "grad_norm": 3.840642279858671, "learning_rate": 1.6139758942870015e-05, "loss": 0.9078, "step": 2031 }, { "epoch": 0.31, "grad_norm": 2.9496055825239855, "learning_rate": 1.6135845079945994e-05, "loss": 0.9009, "step": 2032 }, { "epoch": 0.31, "grad_norm": 3.4997572003360498, "learning_rate": 1.6131929709003338e-05, "loss": 0.8366, "step": 2033 }, { "epoch": 0.31, "grad_norm": 2.854286295482546, "learning_rate": 1.6128012831004334e-05, "loss": 0.9281, "step": 2034 }, { "epoch": 0.31, "grad_norm": 3.0047445073134647, "learning_rate": 1.6124094446911643e-05, "loss": 0.9288, "step": 2035 }, { "epoch": 0.31, "grad_norm": 3.0747711973099534, "learning_rate": 1.6120174557688296e-05, "loss": 0.9777, "step": 2036 }, { "epoch": 0.31, "grad_norm": 2.836496039165514, "learning_rate": 1.6116253164297688e-05, "loss": 1.0003, "step": 2037 }, { "epoch": 0.31, "grad_norm": 2.9495829885617866, "learning_rate": 1.611233026770359e-05, "loss": 0.958, "step": 2038 }, { "epoch": 0.31, "grad_norm": 4.040177556037205, "learning_rate": 1.6108405868870138e-05, "loss": 1.0066, "step": 2039 }, { "epoch": 0.31, "grad_norm": 2.8392312040901073, "learning_rate": 1.610447996876184e-05, "loss": 0.9305, "step": 2040 }, { "epoch": 0.31, "grad_norm": 2.858555557096998, "learning_rate": 1.6100552568343575e-05, "loss": 0.9561, "step": 2041 }, { "epoch": 0.31, "grad_norm": 2.925244748188534, "learning_rate": 1.609662366858058e-05, "loss": 0.9858, "step": 2042 }, { "epoch": 0.31, "grad_norm": 18.775907505689492, "learning_rate": 1.6092693270438477e-05, "loss": 1.1733, "step": 2043 }, { "epoch": 0.31, "grad_norm": 2.8831230007137116, "learning_rate": 1.6088761374883244e-05, "loss": 0.9558, "step": 2044 }, { "epoch": 0.31, "grad_norm": 3.0027686259492707, "learning_rate": 1.608482798288123e-05, "loss": 0.9855, "step": 2045 }, { "epoch": 0.31, "grad_norm": 3.3576253702197185, "learning_rate": 1.6080893095399154e-05, "loss": 0.996, "step": 2046 }, { "epoch": 0.31, "grad_norm": 2.9658837524613344, "learning_rate": 1.6076956713404096e-05, "loss": 0.9021, "step": 2047 }, { "epoch": 0.31, "grad_norm": 3.1626601985594966, "learning_rate": 1.607301883786352e-05, "loss": 1.0554, "step": 2048 }, { "epoch": 0.31, "grad_norm": 3.0553939427426196, "learning_rate": 1.6069079469745232e-05, "loss": 0.7729, "step": 2049 }, { "epoch": 0.31, "grad_norm": 2.7411280804515896, "learning_rate": 1.6065138610017425e-05, "loss": 0.975, "step": 2050 }, { "epoch": 0.31, "grad_norm": 2.774742963619658, "learning_rate": 1.606119625964865e-05, "loss": 0.7768, "step": 2051 }, { "epoch": 0.31, "grad_norm": 38.98687845629915, "learning_rate": 1.605725241960783e-05, "loss": 1.1693, "step": 2052 }, { "epoch": 0.31, "grad_norm": 2.7458443144950424, "learning_rate": 1.6053307090864247e-05, "loss": 0.9069, "step": 2053 }, { "epoch": 0.31, "grad_norm": 2.919021481663924, "learning_rate": 1.604936027438755e-05, "loss": 0.9223, "step": 2054 }, { "epoch": 0.31, "grad_norm": 2.948893360420509, "learning_rate": 1.604541197114776e-05, "loss": 0.94, "step": 2055 }, { "epoch": 0.31, "grad_norm": 2.8116682169195295, "learning_rate": 1.6041462182115257e-05, "loss": 0.9397, "step": 2056 }, { "epoch": 0.31, "grad_norm": 2.701273752635258, "learning_rate": 1.603751090826079e-05, "loss": 0.9303, "step": 2057 }, { "epoch": 0.32, "grad_norm": 2.9204076878350715, "learning_rate": 1.6033558150555468e-05, "loss": 0.9636, "step": 2058 }, { "epoch": 0.32, "grad_norm": 2.9249074674731808, "learning_rate": 1.602960390997077e-05, "loss": 0.8089, "step": 2059 }, { "epoch": 0.32, "grad_norm": 2.8604826058914865, "learning_rate": 1.602564818747854e-05, "loss": 0.9502, "step": 2060 }, { "epoch": 0.32, "grad_norm": 2.642150556596377, "learning_rate": 1.602169098405098e-05, "loss": 0.8838, "step": 2061 }, { "epoch": 0.32, "grad_norm": 2.671658746125172, "learning_rate": 1.601773230066066e-05, "loss": 0.8714, "step": 2062 }, { "epoch": 0.32, "grad_norm": 3.0325050799154147, "learning_rate": 1.6013772138280516e-05, "loss": 0.945, "step": 2063 }, { "epoch": 0.32, "grad_norm": 3.0000570968848552, "learning_rate": 1.600981049788384e-05, "loss": 1.0131, "step": 2064 }, { "epoch": 0.32, "grad_norm": 2.904729128627338, "learning_rate": 1.6005847380444296e-05, "loss": 0.9075, "step": 2065 }, { "epoch": 0.32, "grad_norm": 2.716502377018324, "learning_rate": 1.6001882786935906e-05, "loss": 0.8516, "step": 2066 }, { "epoch": 0.32, "grad_norm": 3.0878573330752905, "learning_rate": 1.599791671833306e-05, "loss": 0.8876, "step": 2067 }, { "epoch": 0.32, "grad_norm": 3.0105827423255227, "learning_rate": 1.5993949175610496e-05, "loss": 0.9043, "step": 2068 }, { "epoch": 0.32, "grad_norm": 2.8610460528157593, "learning_rate": 1.5989980159743336e-05, "loss": 0.9194, "step": 2069 }, { "epoch": 0.32, "grad_norm": 3.2183249053149567, "learning_rate": 1.5986009671707048e-05, "loss": 1.097, "step": 2070 }, { "epoch": 0.32, "grad_norm": 2.866509114415062, "learning_rate": 1.5982037712477466e-05, "loss": 0.9069, "step": 2071 }, { "epoch": 0.32, "grad_norm": 2.9793432999723133, "learning_rate": 1.5978064283030784e-05, "loss": 1.0063, "step": 2072 }, { "epoch": 0.32, "grad_norm": 3.366780686052711, "learning_rate": 1.597408938434356e-05, "loss": 0.9817, "step": 2073 }, { "epoch": 0.32, "grad_norm": 2.7600076399826334, "learning_rate": 1.5970113017392724e-05, "loss": 0.9071, "step": 2074 }, { "epoch": 0.32, "grad_norm": 3.1655593506091955, "learning_rate": 1.5966135183155542e-05, "loss": 0.8892, "step": 2075 }, { "epoch": 0.32, "grad_norm": 3.04836429836989, "learning_rate": 1.5962155882609657e-05, "loss": 0.9741, "step": 2076 }, { "epoch": 0.32, "grad_norm": 8.977526763468514, "learning_rate": 1.5958175116733074e-05, "loss": 1.2192, "step": 2077 }, { "epoch": 0.32, "grad_norm": 2.726548999880068, "learning_rate": 1.595419288650415e-05, "loss": 0.9317, "step": 2078 }, { "epoch": 0.32, "grad_norm": 3.011217259674851, "learning_rate": 1.5950209192901603e-05, "loss": 0.9827, "step": 2079 }, { "epoch": 0.32, "grad_norm": 3.29525534444165, "learning_rate": 1.594622403690452e-05, "loss": 0.9244, "step": 2080 }, { "epoch": 0.32, "grad_norm": 3.229445003686063, "learning_rate": 1.5942237419492334e-05, "loss": 0.9362, "step": 2081 }, { "epoch": 0.32, "grad_norm": 2.9204486236688245, "learning_rate": 1.5938249341644847e-05, "loss": 0.9227, "step": 2082 }, { "epoch": 0.32, "grad_norm": 2.927607260082721, "learning_rate": 1.5934259804342218e-05, "loss": 0.9349, "step": 2083 }, { "epoch": 0.32, "grad_norm": 2.837295266420575, "learning_rate": 1.593026880856496e-05, "loss": 0.8942, "step": 2084 }, { "epoch": 0.32, "grad_norm": 2.651081252949924, "learning_rate": 1.5926276355293948e-05, "loss": 0.8587, "step": 2085 }, { "epoch": 0.32, "grad_norm": 8.857579788362404, "learning_rate": 1.5922282445510422e-05, "loss": 1.1825, "step": 2086 }, { "epoch": 0.32, "grad_norm": 2.641208085873871, "learning_rate": 1.5918287080195962e-05, "loss": 0.9027, "step": 2087 }, { "epoch": 0.32, "grad_norm": 2.80746351383302, "learning_rate": 1.5914290260332524e-05, "loss": 0.9674, "step": 2088 }, { "epoch": 0.32, "grad_norm": 3.0202119253982875, "learning_rate": 1.5910291986902415e-05, "loss": 1.0477, "step": 2089 }, { "epoch": 0.32, "grad_norm": 2.7121329518000694, "learning_rate": 1.590629226088829e-05, "loss": 0.9407, "step": 2090 }, { "epoch": 0.32, "grad_norm": 2.8600057410215216, "learning_rate": 1.5902291083273182e-05, "loss": 0.812, "step": 2091 }, { "epoch": 0.32, "grad_norm": 2.95858748031997, "learning_rate": 1.589828845504046e-05, "loss": 0.8556, "step": 2092 }, { "epoch": 0.32, "grad_norm": 5.746810792832964, "learning_rate": 1.589428437717386e-05, "loss": 1.1038, "step": 2093 }, { "epoch": 0.32, "grad_norm": 2.917450104375745, "learning_rate": 1.5890278850657468e-05, "loss": 0.963, "step": 2094 }, { "epoch": 0.32, "grad_norm": 3.148069985975481, "learning_rate": 1.5886271876475733e-05, "loss": 1.0287, "step": 2095 }, { "epoch": 0.32, "grad_norm": 2.919608954475873, "learning_rate": 1.588226345561346e-05, "loss": 1.0119, "step": 2096 }, { "epoch": 0.32, "grad_norm": 3.0119482294019226, "learning_rate": 1.5878253589055807e-05, "loss": 0.9433, "step": 2097 }, { "epoch": 0.32, "grad_norm": 2.745844362970861, "learning_rate": 1.587424227778828e-05, "loss": 0.9186, "step": 2098 }, { "epoch": 0.32, "grad_norm": 2.7795582081388304, "learning_rate": 1.587022952279675e-05, "loss": 0.939, "step": 2099 }, { "epoch": 0.32, "grad_norm": 2.9849086913750154, "learning_rate": 1.5866215325067442e-05, "loss": 0.9054, "step": 2100 }, { "epoch": 0.32, "grad_norm": 2.830979510479411, "learning_rate": 1.586219968558693e-05, "loss": 0.9651, "step": 2101 }, { "epoch": 0.32, "grad_norm": 2.921574916421984, "learning_rate": 1.5858182605342146e-05, "loss": 0.8421, "step": 2102 }, { "epoch": 0.32, "grad_norm": 2.8984836166465087, "learning_rate": 1.5854164085320375e-05, "loss": 0.9944, "step": 2103 }, { "epoch": 0.32, "grad_norm": 2.96415686502195, "learning_rate": 1.5850144126509263e-05, "loss": 0.8441, "step": 2104 }, { "epoch": 0.32, "grad_norm": 8.399398202622699, "learning_rate": 1.5846122729896796e-05, "loss": 1.0563, "step": 2105 }, { "epoch": 0.32, "grad_norm": 2.8074715800805694, "learning_rate": 1.5842099896471322e-05, "loss": 0.8831, "step": 2106 }, { "epoch": 0.32, "grad_norm": 3.036041778722464, "learning_rate": 1.583807562722154e-05, "loss": 0.9816, "step": 2107 }, { "epoch": 0.32, "grad_norm": 3.0284113302543822, "learning_rate": 1.5834049923136508e-05, "loss": 1.0098, "step": 2108 }, { "epoch": 0.32, "grad_norm": 2.7496789939771475, "learning_rate": 1.5830022785205623e-05, "loss": 0.8986, "step": 2109 }, { "epoch": 0.32, "grad_norm": 2.953041494056494, "learning_rate": 1.5825994214418646e-05, "loss": 0.8537, "step": 2110 }, { "epoch": 0.32, "grad_norm": 2.659977164215324, "learning_rate": 1.582196421176569e-05, "loss": 0.9785, "step": 2111 }, { "epoch": 0.32, "grad_norm": 3.204140063929111, "learning_rate": 1.5817932778237217e-05, "loss": 0.9483, "step": 2112 }, { "epoch": 0.32, "grad_norm": 2.6887868024183654, "learning_rate": 1.581389991482403e-05, "loss": 0.9564, "step": 2113 }, { "epoch": 0.32, "grad_norm": 2.7755813582969027, "learning_rate": 1.5809865622517303e-05, "loss": 0.9934, "step": 2114 }, { "epoch": 0.32, "grad_norm": 2.845892550045217, "learning_rate": 1.5805829902308555e-05, "loss": 0.9452, "step": 2115 }, { "epoch": 0.32, "grad_norm": 2.853050815785061, "learning_rate": 1.580179275518964e-05, "loss": 0.93, "step": 2116 }, { "epoch": 0.32, "grad_norm": 3.2733220558775726, "learning_rate": 1.5797754182152786e-05, "loss": 1.0112, "step": 2117 }, { "epoch": 0.32, "grad_norm": 2.979523192746589, "learning_rate": 1.579371418419056e-05, "loss": 0.8896, "step": 2118 }, { "epoch": 0.32, "grad_norm": 3.4450783956006923, "learning_rate": 1.578967276229588e-05, "loss": 1.1442, "step": 2119 }, { "epoch": 0.32, "grad_norm": 2.864541500830383, "learning_rate": 1.5785629917462005e-05, "loss": 0.8686, "step": 2120 }, { "epoch": 0.32, "grad_norm": 2.8665462714350847, "learning_rate": 1.5781585650682565e-05, "loss": 0.8522, "step": 2121 }, { "epoch": 0.32, "grad_norm": 7.2402800359131625, "learning_rate": 1.5777539962951526e-05, "loss": 1.0849, "step": 2122 }, { "epoch": 0.32, "grad_norm": 2.9401554666052516, "learning_rate": 1.5773492855263196e-05, "loss": 0.9797, "step": 2123 }, { "epoch": 0.33, "grad_norm": 2.826087551554652, "learning_rate": 1.5769444328612248e-05, "loss": 0.9366, "step": 2124 }, { "epoch": 0.33, "grad_norm": 2.9748307430738388, "learning_rate": 1.5765394383993693e-05, "loss": 0.9461, "step": 2125 }, { "epoch": 0.33, "grad_norm": 2.917612348057046, "learning_rate": 1.57613430224029e-05, "loss": 0.9599, "step": 2126 }, { "epoch": 0.33, "grad_norm": 2.9492782151559545, "learning_rate": 1.5757290244835566e-05, "loss": 0.9607, "step": 2127 }, { "epoch": 0.33, "grad_norm": 3.104337592341858, "learning_rate": 1.5753236052287766e-05, "loss": 0.9517, "step": 2128 }, { "epoch": 0.33, "grad_norm": 2.8612025981659652, "learning_rate": 1.57491804457559e-05, "loss": 1.0167, "step": 2129 }, { "epoch": 0.33, "grad_norm": 2.8123553472731198, "learning_rate": 1.5745123426236716e-05, "loss": 0.9264, "step": 2130 }, { "epoch": 0.33, "grad_norm": 2.830018363337432, "learning_rate": 1.5741064994727327e-05, "loss": 0.8245, "step": 2131 }, { "epoch": 0.33, "grad_norm": 2.8110191476042683, "learning_rate": 1.5737005152225176e-05, "loss": 0.8709, "step": 2132 }, { "epoch": 0.33, "grad_norm": 2.8502055155928923, "learning_rate": 1.573294389972806e-05, "loss": 1.0445, "step": 2133 }, { "epoch": 0.33, "grad_norm": 3.0421641138681763, "learning_rate": 1.5728881238234118e-05, "loss": 1.0078, "step": 2134 }, { "epoch": 0.33, "grad_norm": 2.80236736793243, "learning_rate": 1.572481716874184e-05, "loss": 0.9258, "step": 2135 }, { "epoch": 0.33, "grad_norm": 3.1396454859019043, "learning_rate": 1.572075169225006e-05, "loss": 1.0275, "step": 2136 }, { "epoch": 0.33, "grad_norm": 3.01442584800464, "learning_rate": 1.571668480975796e-05, "loss": 0.9114, "step": 2137 }, { "epoch": 0.33, "grad_norm": 2.9439289256223495, "learning_rate": 1.5712616522265062e-05, "loss": 0.9387, "step": 2138 }, { "epoch": 0.33, "grad_norm": 2.86630479136464, "learning_rate": 1.5708546830771242e-05, "loss": 0.8924, "step": 2139 }, { "epoch": 0.33, "grad_norm": 2.9053380514019573, "learning_rate": 1.5704475736276708e-05, "loss": 0.9631, "step": 2140 }, { "epoch": 0.33, "grad_norm": 3.368103684245844, "learning_rate": 1.5700403239782035e-05, "loss": 0.913, "step": 2141 }, { "epoch": 0.33, "grad_norm": 2.857579536349821, "learning_rate": 1.569632934228811e-05, "loss": 1.0226, "step": 2142 }, { "epoch": 0.33, "grad_norm": 3.0564847208325254, "learning_rate": 1.56922540447962e-05, "loss": 0.9765, "step": 2143 }, { "epoch": 0.33, "grad_norm": 2.9287560858591966, "learning_rate": 1.568817734830789e-05, "loss": 0.8979, "step": 2144 }, { "epoch": 0.33, "grad_norm": 2.8933213409616583, "learning_rate": 1.5684099253825117e-05, "loss": 0.9059, "step": 2145 }, { "epoch": 0.33, "grad_norm": 2.639794473491297, "learning_rate": 1.5680019762350162e-05, "loss": 0.9327, "step": 2146 }, { "epoch": 0.33, "grad_norm": 2.891538719719891, "learning_rate": 1.567593887488565e-05, "loss": 1.0255, "step": 2147 }, { "epoch": 0.33, "grad_norm": 2.8097333302096015, "learning_rate": 1.5671856592434557e-05, "loss": 0.9987, "step": 2148 }, { "epoch": 0.33, "grad_norm": 2.788275914343878, "learning_rate": 1.5667772916000182e-05, "loss": 0.8716, "step": 2149 }, { "epoch": 0.33, "grad_norm": 2.968827801790018, "learning_rate": 1.5663687846586183e-05, "loss": 0.8922, "step": 2150 }, { "epoch": 0.33, "grad_norm": 2.9789373637634182, "learning_rate": 1.5659601385196555e-05, "loss": 0.7631, "step": 2151 }, { "epoch": 0.33, "grad_norm": 2.641680662522926, "learning_rate": 1.565551353283564e-05, "loss": 0.8783, "step": 2152 }, { "epoch": 0.33, "grad_norm": 2.992765042257369, "learning_rate": 1.565142429050811e-05, "loss": 0.9534, "step": 2153 }, { "epoch": 0.33, "grad_norm": 2.912789857674366, "learning_rate": 1.564733365921899e-05, "loss": 0.9623, "step": 2154 }, { "epoch": 0.33, "grad_norm": 3.1939374684533623, "learning_rate": 1.564324163997364e-05, "loss": 0.9484, "step": 2155 }, { "epoch": 0.33, "grad_norm": 2.910399280383399, "learning_rate": 1.563914823377777e-05, "loss": 1.05, "step": 2156 }, { "epoch": 0.33, "grad_norm": 2.7829633029832346, "learning_rate": 1.5635053441637416e-05, "loss": 0.899, "step": 2157 }, { "epoch": 0.33, "grad_norm": 2.8483703755801475, "learning_rate": 1.563095726455897e-05, "loss": 0.9353, "step": 2158 }, { "epoch": 0.33, "grad_norm": 3.1605453107869805, "learning_rate": 1.5626859703549153e-05, "loss": 0.9756, "step": 2159 }, { "epoch": 0.33, "grad_norm": 2.6579454798900675, "learning_rate": 1.5622760759615033e-05, "loss": 0.9316, "step": 2160 }, { "epoch": 0.33, "grad_norm": 2.895988425803534, "learning_rate": 1.561866043376401e-05, "loss": 0.8316, "step": 2161 }, { "epoch": 0.33, "grad_norm": 3.1091023855621067, "learning_rate": 1.5614558727003838e-05, "loss": 0.8095, "step": 2162 }, { "epoch": 0.33, "grad_norm": 2.723661965840028, "learning_rate": 1.56104556403426e-05, "loss": 0.9086, "step": 2163 }, { "epoch": 0.33, "grad_norm": 3.1270182582775274, "learning_rate": 1.560635117478871e-05, "loss": 0.9554, "step": 2164 }, { "epoch": 0.33, "grad_norm": 2.9598489071966627, "learning_rate": 1.560224533135094e-05, "loss": 0.864, "step": 2165 }, { "epoch": 0.33, "grad_norm": 2.912429840064479, "learning_rate": 1.559813811103839e-05, "loss": 0.9377, "step": 2166 }, { "epoch": 0.33, "grad_norm": 2.8075984318631653, "learning_rate": 1.55940295148605e-05, "loss": 0.9434, "step": 2167 }, { "epoch": 0.33, "grad_norm": 2.9689893677610657, "learning_rate": 1.5589919543827038e-05, "loss": 0.8091, "step": 2168 }, { "epoch": 0.33, "grad_norm": 2.7527341533964913, "learning_rate": 1.5585808198948135e-05, "loss": 0.9535, "step": 2169 }, { "epoch": 0.33, "grad_norm": 3.240281727452343, "learning_rate": 1.5581695481234234e-05, "loss": 0.9207, "step": 2170 }, { "epoch": 0.33, "grad_norm": 2.9346697305866134, "learning_rate": 1.5577581391696125e-05, "loss": 0.9847, "step": 2171 }, { "epoch": 0.33, "grad_norm": 3.2363181840874184, "learning_rate": 1.5573465931344943e-05, "loss": 0.9466, "step": 2172 }, { "epoch": 0.33, "grad_norm": 3.1311202216405905, "learning_rate": 1.5569349101192147e-05, "loss": 0.7614, "step": 2173 }, { "epoch": 0.33, "grad_norm": 2.9897802189566147, "learning_rate": 1.556523090224954e-05, "loss": 0.9406, "step": 2174 }, { "epoch": 0.33, "grad_norm": 2.866293002770438, "learning_rate": 1.5561111335529262e-05, "loss": 0.856, "step": 2175 }, { "epoch": 0.33, "grad_norm": 2.7666487683268266, "learning_rate": 1.5556990402043785e-05, "loss": 0.8607, "step": 2176 }, { "epoch": 0.33, "grad_norm": 3.206477614388165, "learning_rate": 1.5552868102805914e-05, "loss": 0.9272, "step": 2177 }, { "epoch": 0.33, "grad_norm": 2.703502589931452, "learning_rate": 1.5548744438828806e-05, "loss": 0.8436, "step": 2178 }, { "epoch": 0.33, "grad_norm": 2.9045504062867398, "learning_rate": 1.5544619411125932e-05, "loss": 0.9542, "step": 2179 }, { "epoch": 0.33, "grad_norm": 3.4383119642842557, "learning_rate": 1.554049302071111e-05, "loss": 0.9043, "step": 2180 }, { "epoch": 0.33, "grad_norm": 2.749910787471238, "learning_rate": 1.5536365268598495e-05, "loss": 0.9428, "step": 2181 }, { "epoch": 0.33, "grad_norm": 3.0887495819218884, "learning_rate": 1.5532236155802568e-05, "loss": 0.8238, "step": 2182 }, { "epoch": 0.33, "grad_norm": 2.7762753591598526, "learning_rate": 1.5528105683338153e-05, "loss": 0.9144, "step": 2183 }, { "epoch": 0.33, "grad_norm": 2.8725126638936267, "learning_rate": 1.5523973852220403e-05, "loss": 0.8462, "step": 2184 }, { "epoch": 0.33, "grad_norm": 2.9885596304544615, "learning_rate": 1.5519840663464803e-05, "loss": 0.8969, "step": 2185 }, { "epoch": 0.33, "grad_norm": 3.473266378506294, "learning_rate": 1.5515706118087178e-05, "loss": 1.0156, "step": 2186 }, { "epoch": 0.33, "grad_norm": 7.417240186483622, "learning_rate": 1.5511570217103686e-05, "loss": 1.1446, "step": 2187 }, { "epoch": 0.33, "grad_norm": 3.084298206467197, "learning_rate": 1.550743296153081e-05, "loss": 0.9202, "step": 2188 }, { "epoch": 0.34, "grad_norm": 2.9819144441801195, "learning_rate": 1.5503294352385376e-05, "loss": 0.9559, "step": 2189 }, { "epoch": 0.34, "grad_norm": 8.69663067192699, "learning_rate": 1.5499154390684534e-05, "loss": 1.1093, "step": 2190 }, { "epoch": 0.34, "grad_norm": 2.9797669448476896, "learning_rate": 1.5495013077445773e-05, "loss": 0.9774, "step": 2191 }, { "epoch": 0.34, "grad_norm": 3.1250248267064844, "learning_rate": 1.5490870413686913e-05, "loss": 0.8908, "step": 2192 }, { "epoch": 0.34, "grad_norm": 4.978205398929475, "learning_rate": 1.54867264004261e-05, "loss": 1.0935, "step": 2193 }, { "epoch": 0.34, "grad_norm": 3.3254960105618383, "learning_rate": 1.5482581038681817e-05, "loss": 0.9489, "step": 2194 }, { "epoch": 0.34, "grad_norm": 3.423818665589391, "learning_rate": 1.5478434329472883e-05, "loss": 0.9161, "step": 2195 }, { "epoch": 0.34, "grad_norm": 4.997633325042334, "learning_rate": 1.5474286273818437e-05, "loss": 1.0483, "step": 2196 }, { "epoch": 0.34, "grad_norm": 2.901737431846852, "learning_rate": 1.547013687273796e-05, "loss": 0.8797, "step": 2197 }, { "epoch": 0.34, "grad_norm": 2.9155767889936506, "learning_rate": 1.5465986127251253e-05, "loss": 1.0351, "step": 2198 }, { "epoch": 0.34, "grad_norm": 2.893278694107767, "learning_rate": 1.546183403837845e-05, "loss": 0.9258, "step": 2199 }, { "epoch": 0.34, "grad_norm": 2.995767480013683, "learning_rate": 1.545768060714003e-05, "loss": 0.8585, "step": 2200 }, { "epoch": 0.34, "grad_norm": 2.9454418666606514, "learning_rate": 1.5453525834556784e-05, "loss": 0.9562, "step": 2201 }, { "epoch": 0.34, "grad_norm": 2.743360233087499, "learning_rate": 1.5449369721649835e-05, "loss": 0.8992, "step": 2202 }, { "epoch": 0.34, "grad_norm": 3.0252718804825993, "learning_rate": 1.5445212269440644e-05, "loss": 1.0418, "step": 2203 }, { "epoch": 0.34, "grad_norm": 2.8837846598545855, "learning_rate": 1.544105347895099e-05, "loss": 0.9565, "step": 2204 }, { "epoch": 0.34, "grad_norm": 3.2789960918949608, "learning_rate": 1.5436893351202993e-05, "loss": 0.9387, "step": 2205 }, { "epoch": 0.34, "grad_norm": 6.385976907084884, "learning_rate": 1.5432731887219094e-05, "loss": 1.1257, "step": 2206 }, { "epoch": 0.34, "grad_norm": 2.8727045249703735, "learning_rate": 1.5428569088022067e-05, "loss": 0.9819, "step": 2207 }, { "epoch": 0.34, "grad_norm": 2.9735127238030588, "learning_rate": 1.5424404954635e-05, "loss": 0.9494, "step": 2208 }, { "epoch": 0.34, "grad_norm": 3.0445408913692784, "learning_rate": 1.5420239488081335e-05, "loss": 0.8285, "step": 2209 }, { "epoch": 0.34, "grad_norm": 7.763826104396605, "learning_rate": 1.5416072689384818e-05, "loss": 1.1142, "step": 2210 }, { "epoch": 0.34, "grad_norm": 3.061026010845337, "learning_rate": 1.5411904559569536e-05, "loss": 0.8954, "step": 2211 }, { "epoch": 0.34, "grad_norm": 3.084396939692579, "learning_rate": 1.540773509965989e-05, "loss": 0.9391, "step": 2212 }, { "epoch": 0.34, "grad_norm": 2.8981176045057295, "learning_rate": 1.5403564310680627e-05, "loss": 0.841, "step": 2213 }, { "epoch": 0.34, "grad_norm": 2.7909013188405085, "learning_rate": 1.5399392193656802e-05, "loss": 0.9828, "step": 2214 }, { "epoch": 0.34, "grad_norm": 2.957934625908896, "learning_rate": 1.5395218749613808e-05, "loss": 0.9562, "step": 2215 }, { "epoch": 0.34, "grad_norm": 2.752532249725703, "learning_rate": 1.5391043979577364e-05, "loss": 0.9208, "step": 2216 }, { "epoch": 0.34, "grad_norm": 2.9615314376475936, "learning_rate": 1.5386867884573505e-05, "loss": 0.9258, "step": 2217 }, { "epoch": 0.34, "grad_norm": 3.010242558942028, "learning_rate": 1.53826904656286e-05, "loss": 0.9155, "step": 2218 }, { "epoch": 0.34, "grad_norm": 3.2902012119762647, "learning_rate": 1.537851172376934e-05, "loss": 0.9412, "step": 2219 }, { "epoch": 0.34, "grad_norm": 2.9225946084002095, "learning_rate": 1.5374331660022744e-05, "loss": 0.9045, "step": 2220 }, { "epoch": 0.34, "grad_norm": 2.939000118464009, "learning_rate": 1.537015027541616e-05, "loss": 0.9167, "step": 2221 }, { "epoch": 0.34, "grad_norm": 2.8952624644975042, "learning_rate": 1.5365967570977244e-05, "loss": 0.9866, "step": 2222 }, { "epoch": 0.34, "grad_norm": 2.9487056282258512, "learning_rate": 1.5361783547733997e-05, "loss": 0.9799, "step": 2223 }, { "epoch": 0.34, "grad_norm": 3.0272505909958296, "learning_rate": 1.5357598206714726e-05, "loss": 0.8923, "step": 2224 }, { "epoch": 0.34, "grad_norm": 2.9581269346386927, "learning_rate": 1.535341154894808e-05, "loss": 0.9712, "step": 2225 }, { "epoch": 0.34, "grad_norm": 2.6448689391213436, "learning_rate": 1.5349223575463015e-05, "loss": 0.9218, "step": 2226 }, { "epoch": 0.34, "grad_norm": 3.7704869063269126, "learning_rate": 1.534503428728882e-05, "loss": 0.9657, "step": 2227 }, { "epoch": 0.34, "grad_norm": 2.663305732820632, "learning_rate": 1.5340843685455105e-05, "loss": 0.8638, "step": 2228 }, { "epoch": 0.34, "grad_norm": 2.90419193223228, "learning_rate": 1.5336651770991806e-05, "loss": 0.9674, "step": 2229 }, { "epoch": 0.34, "grad_norm": 2.888348711422464, "learning_rate": 1.533245854492917e-05, "loss": 0.8858, "step": 2230 }, { "epoch": 0.34, "grad_norm": 28.443076215648286, "learning_rate": 1.5328264008297774e-05, "loss": 1.1028, "step": 2231 }, { "epoch": 0.34, "grad_norm": 2.67306035825745, "learning_rate": 1.5324068162128524e-05, "loss": 0.9006, "step": 2232 }, { "epoch": 0.34, "grad_norm": 2.883306393381524, "learning_rate": 1.5319871007452643e-05, "loss": 0.9638, "step": 2233 }, { "epoch": 0.34, "grad_norm": 2.9956980307536196, "learning_rate": 1.5315672545301668e-05, "loss": 0.8938, "step": 2234 }, { "epoch": 0.34, "grad_norm": 2.8465615901796655, "learning_rate": 1.531147277670746e-05, "loss": 0.9024, "step": 2235 }, { "epoch": 0.34, "grad_norm": 3.0133716494370693, "learning_rate": 1.5307271702702215e-05, "loss": 0.8917, "step": 2236 }, { "epoch": 0.34, "grad_norm": 2.7472224187281222, "learning_rate": 1.530306932431843e-05, "loss": 0.8313, "step": 2237 }, { "epoch": 0.34, "grad_norm": 2.9205674411797533, "learning_rate": 1.529886564258894e-05, "loss": 0.9551, "step": 2238 }, { "epoch": 0.34, "grad_norm": 2.8579615936508076, "learning_rate": 1.529466065854688e-05, "loss": 0.9034, "step": 2239 }, { "epoch": 0.34, "grad_norm": 3.077793196274853, "learning_rate": 1.5290454373225728e-05, "loss": 0.9184, "step": 2240 }, { "epoch": 0.34, "grad_norm": 2.8593478786344995, "learning_rate": 1.528624678765927e-05, "loss": 1.0188, "step": 2241 }, { "epoch": 0.34, "grad_norm": 2.786932569982691, "learning_rate": 1.528203790288161e-05, "loss": 0.8707, "step": 2242 }, { "epoch": 0.34, "grad_norm": 2.986711280532268, "learning_rate": 1.5277827719927168e-05, "loss": 1.0005, "step": 2243 }, { "epoch": 0.34, "grad_norm": 2.8440787062164885, "learning_rate": 1.5273616239830697e-05, "loss": 0.9213, "step": 2244 }, { "epoch": 0.34, "grad_norm": 7.778194347417419, "learning_rate": 1.5269403463627263e-05, "loss": 1.0728, "step": 2245 }, { "epoch": 0.34, "grad_norm": 3.2806136694782992, "learning_rate": 1.5265189392352238e-05, "loss": 0.9671, "step": 2246 }, { "epoch": 0.34, "grad_norm": 3.2636437257793274, "learning_rate": 1.5260974027041328e-05, "loss": 0.9756, "step": 2247 }, { "epoch": 0.34, "grad_norm": 2.7949362496071135, "learning_rate": 1.5256757368730557e-05, "loss": 1.0169, "step": 2248 }, { "epoch": 0.34, "grad_norm": 2.883127148241771, "learning_rate": 1.5252539418456253e-05, "loss": 0.9109, "step": 2249 }, { "epoch": 0.34, "grad_norm": 3.079927358289439, "learning_rate": 1.5248320177255074e-05, "loss": 0.9541, "step": 2250 }, { "epoch": 0.34, "grad_norm": 3.066797245053238, "learning_rate": 1.5244099646163987e-05, "loss": 1.009, "step": 2251 }, { "epoch": 0.34, "grad_norm": 3.196708284476865, "learning_rate": 1.5239877826220286e-05, "loss": 0.9755, "step": 2252 }, { "epoch": 0.34, "grad_norm": 2.972529111217515, "learning_rate": 1.5235654718461572e-05, "loss": 0.8432, "step": 2253 }, { "epoch": 0.35, "grad_norm": 2.6898726536996396, "learning_rate": 1.5231430323925768e-05, "loss": 0.8175, "step": 2254 }, { "epoch": 0.35, "grad_norm": 3.052173706002016, "learning_rate": 1.5227204643651112e-05, "loss": 0.979, "step": 2255 }, { "epoch": 0.35, "grad_norm": 3.370484652099961, "learning_rate": 1.5222977678676159e-05, "loss": 0.955, "step": 2256 }, { "epoch": 0.35, "grad_norm": 2.857037218718541, "learning_rate": 1.5218749430039772e-05, "loss": 0.9497, "step": 2257 }, { "epoch": 0.35, "grad_norm": 2.686950220905987, "learning_rate": 1.5214519898781141e-05, "loss": 0.8466, "step": 2258 }, { "epoch": 0.35, "grad_norm": 3.023274578834779, "learning_rate": 1.5210289085939769e-05, "loss": 1.0174, "step": 2259 }, { "epoch": 0.35, "grad_norm": 3.017154767031705, "learning_rate": 1.5206056992555465e-05, "loss": 0.9454, "step": 2260 }, { "epoch": 0.35, "grad_norm": 2.693819407507513, "learning_rate": 1.5201823619668362e-05, "loss": 0.9532, "step": 2261 }, { "epoch": 0.35, "grad_norm": 2.7126130445896273, "learning_rate": 1.5197588968318904e-05, "loss": 0.957, "step": 2262 }, { "epoch": 0.35, "grad_norm": 2.585559358851023, "learning_rate": 1.519335303954785e-05, "loss": 0.9274, "step": 2263 }, { "epoch": 0.35, "grad_norm": 2.9563349968700394, "learning_rate": 1.518911583439627e-05, "loss": 0.9926, "step": 2264 }, { "epoch": 0.35, "grad_norm": 3.3004301639856544, "learning_rate": 1.5184877353905556e-05, "loss": 0.946, "step": 2265 }, { "epoch": 0.35, "grad_norm": 2.7952021511451006, "learning_rate": 1.5180637599117401e-05, "loss": 0.942, "step": 2266 }, { "epoch": 0.35, "grad_norm": 3.0215029152891892, "learning_rate": 1.5176396571073821e-05, "loss": 0.9685, "step": 2267 }, { "epoch": 0.35, "grad_norm": 2.7965263658600503, "learning_rate": 1.517215427081714e-05, "loss": 0.9508, "step": 2268 }, { "epoch": 0.35, "grad_norm": 2.917787388458425, "learning_rate": 1.516791069939e-05, "loss": 0.9753, "step": 2269 }, { "epoch": 0.35, "grad_norm": 2.7820103402362046, "learning_rate": 1.5163665857835348e-05, "loss": 0.9348, "step": 2270 }, { "epoch": 0.35, "grad_norm": 3.0049793733461914, "learning_rate": 1.515941974719645e-05, "loss": 0.9459, "step": 2271 }, { "epoch": 0.35, "grad_norm": 2.9106205504262777, "learning_rate": 1.515517236851688e-05, "loss": 0.8804, "step": 2272 }, { "epoch": 0.35, "grad_norm": 2.9958370089998407, "learning_rate": 1.5150923722840523e-05, "loss": 0.9294, "step": 2273 }, { "epoch": 0.35, "grad_norm": 2.861271074958744, "learning_rate": 1.5146673811211576e-05, "loss": 0.9511, "step": 2274 }, { "epoch": 0.35, "grad_norm": 2.8958602073341666, "learning_rate": 1.5142422634674551e-05, "loss": 0.9407, "step": 2275 }, { "epoch": 0.35, "grad_norm": 2.847941228108326, "learning_rate": 1.5138170194274269e-05, "loss": 1.0268, "step": 2276 }, { "epoch": 0.35, "grad_norm": 2.902937511503127, "learning_rate": 1.5133916491055858e-05, "loss": 0.8619, "step": 2277 }, { "epoch": 0.35, "grad_norm": 2.941672149117542, "learning_rate": 1.512966152606476e-05, "loss": 1.0764, "step": 2278 }, { "epoch": 0.35, "grad_norm": 3.1262853598966927, "learning_rate": 1.5125405300346722e-05, "loss": 0.9714, "step": 2279 }, { "epoch": 0.35, "grad_norm": 2.502183056519954, "learning_rate": 1.5121147814947812e-05, "loss": 0.8329, "step": 2280 }, { "epoch": 0.35, "grad_norm": 2.779765611110997, "learning_rate": 1.5116889070914397e-05, "loss": 0.9328, "step": 2281 }, { "epoch": 0.35, "grad_norm": 3.347802675409558, "learning_rate": 1.5112629069293156e-05, "loss": 0.9771, "step": 2282 }, { "epoch": 0.35, "grad_norm": 2.7310814686797293, "learning_rate": 1.5108367811131079e-05, "loss": 0.8361, "step": 2283 }, { "epoch": 0.35, "grad_norm": 2.9784430820973946, "learning_rate": 1.5104105297475462e-05, "loss": 0.9534, "step": 2284 }, { "epoch": 0.35, "grad_norm": 2.90244730992472, "learning_rate": 1.5099841529373918e-05, "loss": 1.0294, "step": 2285 }, { "epoch": 0.35, "grad_norm": 5.741448217823855, "learning_rate": 1.5095576507874353e-05, "loss": 1.0374, "step": 2286 }, { "epoch": 0.35, "grad_norm": 2.84138712585894, "learning_rate": 1.5091310234024991e-05, "loss": 0.8247, "step": 2287 }, { "epoch": 0.35, "grad_norm": 3.113982915053296, "learning_rate": 1.5087042708874368e-05, "loss": 0.8825, "step": 2288 }, { "epoch": 0.35, "grad_norm": 2.9396202859487635, "learning_rate": 1.508277393347132e-05, "loss": 0.8591, "step": 2289 }, { "epoch": 0.35, "grad_norm": 2.804939430938009, "learning_rate": 1.5078503908864985e-05, "loss": 0.9577, "step": 2290 }, { "epoch": 0.35, "grad_norm": 3.106725332329504, "learning_rate": 1.5074232636104824e-05, "loss": 0.9256, "step": 2291 }, { "epoch": 0.35, "grad_norm": 2.8731329023678387, "learning_rate": 1.5069960116240597e-05, "loss": 0.8268, "step": 2292 }, { "epoch": 0.35, "grad_norm": 2.561527056009055, "learning_rate": 1.506568635032236e-05, "loss": 0.8696, "step": 2293 }, { "epoch": 0.35, "grad_norm": 3.293742479742836, "learning_rate": 1.5061411339400494e-05, "loss": 0.9943, "step": 2294 }, { "epoch": 0.35, "grad_norm": 3.2364700641093647, "learning_rate": 1.5057135084525671e-05, "loss": 1.0094, "step": 2295 }, { "epoch": 0.35, "grad_norm": 2.8922317115255423, "learning_rate": 1.5052857586748881e-05, "loss": 0.9652, "step": 2296 }, { "epoch": 0.35, "grad_norm": 2.7080024377532146, "learning_rate": 1.5048578847121405e-05, "loss": 0.8256, "step": 2297 }, { "epoch": 0.35, "grad_norm": 2.6647130744588314, "learning_rate": 1.5044298866694842e-05, "loss": 1.0209, "step": 2298 }, { "epoch": 0.35, "grad_norm": 2.7470063263170243, "learning_rate": 1.504001764652109e-05, "loss": 0.8917, "step": 2299 }, { "epoch": 0.35, "grad_norm": 2.8360347324676005, "learning_rate": 1.5035735187652353e-05, "loss": 0.9454, "step": 2300 }, { "epoch": 0.35, "grad_norm": 2.900545216220249, "learning_rate": 1.503145149114114e-05, "loss": 0.9046, "step": 2301 }, { "epoch": 0.35, "grad_norm": 2.8178594587932593, "learning_rate": 1.5027166558040262e-05, "loss": 1.0504, "step": 2302 }, { "epoch": 0.35, "grad_norm": 3.01761000475965, "learning_rate": 1.502288038940284e-05, "loss": 0.9777, "step": 2303 }, { "epoch": 0.35, "grad_norm": 2.979517837174316, "learning_rate": 1.5018592986282286e-05, "loss": 0.848, "step": 2304 }, { "epoch": 0.35, "grad_norm": 2.9140866574800333, "learning_rate": 1.5014304349732327e-05, "loss": 0.8894, "step": 2305 }, { "epoch": 0.35, "grad_norm": 2.8764694466178775, "learning_rate": 1.5010014480806994e-05, "loss": 1.0152, "step": 2306 }, { "epoch": 0.35, "grad_norm": 3.081285640863487, "learning_rate": 1.5005723380560613e-05, "loss": 0.7847, "step": 2307 }, { "epoch": 0.35, "grad_norm": 2.636984699828142, "learning_rate": 1.5001431050047814e-05, "loss": 0.9056, "step": 2308 }, { "epoch": 0.35, "grad_norm": 2.833963909345822, "learning_rate": 1.499713749032353e-05, "loss": 0.9704, "step": 2309 }, { "epoch": 0.35, "grad_norm": 3.0114563448994174, "learning_rate": 1.4992842702443005e-05, "loss": 0.982, "step": 2310 }, { "epoch": 0.35, "grad_norm": 2.7306229723513553, "learning_rate": 1.4988546687461774e-05, "loss": 0.8364, "step": 2311 }, { "epoch": 0.35, "grad_norm": 3.0296755610921164, "learning_rate": 1.4984249446435674e-05, "loss": 0.9381, "step": 2312 }, { "epoch": 0.35, "grad_norm": 2.991527609025026, "learning_rate": 1.4979950980420847e-05, "loss": 0.9938, "step": 2313 }, { "epoch": 0.35, "grad_norm": 2.9612173589174624, "learning_rate": 1.4975651290473741e-05, "loss": 0.7781, "step": 2314 }, { "epoch": 0.35, "grad_norm": 2.8810874165072233, "learning_rate": 1.4971350377651093e-05, "loss": 1.0572, "step": 2315 }, { "epoch": 0.35, "grad_norm": 2.8538670159087545, "learning_rate": 1.496704824300995e-05, "loss": 1.0219, "step": 2316 }, { "epoch": 0.35, "grad_norm": 3.0232911034956933, "learning_rate": 1.4962744887607654e-05, "loss": 0.9658, "step": 2317 }, { "epoch": 0.35, "grad_norm": 2.7966552102771116, "learning_rate": 1.4958440312501852e-05, "loss": 0.9018, "step": 2318 }, { "epoch": 0.35, "grad_norm": 3.008138253863501, "learning_rate": 1.4954134518750483e-05, "loss": 0.8631, "step": 2319 }, { "epoch": 0.36, "grad_norm": 2.99847234940024, "learning_rate": 1.4949827507411791e-05, "loss": 1.0202, "step": 2320 }, { "epoch": 0.36, "grad_norm": 3.105716574195463, "learning_rate": 1.4945519279544325e-05, "loss": 0.8817, "step": 2321 }, { "epoch": 0.36, "grad_norm": 2.613668461496793, "learning_rate": 1.4941209836206922e-05, "loss": 0.9624, "step": 2322 }, { "epoch": 0.36, "grad_norm": 2.8192085782602696, "learning_rate": 1.4936899178458724e-05, "loss": 0.976, "step": 2323 }, { "epoch": 0.36, "grad_norm": 2.931181999122721, "learning_rate": 1.4932587307359165e-05, "loss": 0.9704, "step": 2324 }, { "epoch": 0.36, "grad_norm": 2.767933016701002, "learning_rate": 1.4928274223967986e-05, "loss": 0.8961, "step": 2325 }, { "epoch": 0.36, "grad_norm": 2.7663837703593295, "learning_rate": 1.4923959929345225e-05, "loss": 0.9543, "step": 2326 }, { "epoch": 0.36, "grad_norm": 2.8819865289259616, "learning_rate": 1.4919644424551205e-05, "loss": 0.92, "step": 2327 }, { "epoch": 0.36, "grad_norm": 2.935705325811252, "learning_rate": 1.4915327710646568e-05, "loss": 0.8172, "step": 2328 }, { "epoch": 0.36, "grad_norm": 2.8807778391035526, "learning_rate": 1.4911009788692235e-05, "loss": 0.8079, "step": 2329 }, { "epoch": 0.36, "grad_norm": 2.7340974992817775, "learning_rate": 1.4906690659749426e-05, "loss": 0.9884, "step": 2330 }, { "epoch": 0.36, "grad_norm": 2.8010092611333235, "learning_rate": 1.4902370324879668e-05, "loss": 0.8415, "step": 2331 }, { "epoch": 0.36, "grad_norm": 2.805620053742415, "learning_rate": 1.4898048785144775e-05, "loss": 0.9433, "step": 2332 }, { "epoch": 0.36, "grad_norm": 2.806019089524397, "learning_rate": 1.4893726041606864e-05, "loss": 0.9695, "step": 2333 }, { "epoch": 0.36, "grad_norm": 2.8069677773537647, "learning_rate": 1.4889402095328343e-05, "loss": 0.8722, "step": 2334 }, { "epoch": 0.36, "grad_norm": 2.906550234653562, "learning_rate": 1.488507694737191e-05, "loss": 0.9039, "step": 2335 }, { "epoch": 0.36, "grad_norm": 3.1084072393256705, "learning_rate": 1.4880750598800574e-05, "loss": 0.9259, "step": 2336 }, { "epoch": 0.36, "grad_norm": 2.749123472026514, "learning_rate": 1.4876423050677627e-05, "loss": 0.9346, "step": 2337 }, { "epoch": 0.36, "grad_norm": 2.6330518662858595, "learning_rate": 1.4872094304066656e-05, "loss": 0.987, "step": 2338 }, { "epoch": 0.36, "grad_norm": 2.788749903404174, "learning_rate": 1.486776436003155e-05, "loss": 0.9606, "step": 2339 }, { "epoch": 0.36, "grad_norm": 2.7976341491384056, "learning_rate": 1.4863433219636488e-05, "loss": 0.9753, "step": 2340 }, { "epoch": 0.36, "grad_norm": 2.665942700415535, "learning_rate": 1.4859100883945936e-05, "loss": 0.9599, "step": 2341 }, { "epoch": 0.36, "grad_norm": 4.930986893091736, "learning_rate": 1.4854767354024668e-05, "loss": 0.8953, "step": 2342 }, { "epoch": 0.36, "grad_norm": 3.0580837922604696, "learning_rate": 1.4850432630937741e-05, "loss": 1.0139, "step": 2343 }, { "epoch": 0.36, "grad_norm": 2.8370525920262972, "learning_rate": 1.4846096715750509e-05, "loss": 0.898, "step": 2344 }, { "epoch": 0.36, "grad_norm": 3.0053150475311012, "learning_rate": 1.4841759609528619e-05, "loss": 0.9479, "step": 2345 }, { "epoch": 0.36, "grad_norm": 2.7670959422581003, "learning_rate": 1.4837421313338008e-05, "loss": 0.915, "step": 2346 }, { "epoch": 0.36, "grad_norm": 2.9217310890737367, "learning_rate": 1.4833081828244908e-05, "loss": 0.8599, "step": 2347 }, { "epoch": 0.36, "grad_norm": 3.490086661578173, "learning_rate": 1.4828741155315844e-05, "loss": 0.9828, "step": 2348 }, { "epoch": 0.36, "grad_norm": 2.849816628823393, "learning_rate": 1.4824399295617631e-05, "loss": 0.8926, "step": 2349 }, { "epoch": 0.36, "grad_norm": 2.999178133361301, "learning_rate": 1.4820056250217377e-05, "loss": 0.9283, "step": 2350 }, { "epoch": 0.36, "grad_norm": 3.2252969975357586, "learning_rate": 1.4815712020182482e-05, "loss": 0.8567, "step": 2351 }, { "epoch": 0.36, "grad_norm": 2.6421881777254463, "learning_rate": 1.4811366606580633e-05, "loss": 0.9335, "step": 2352 }, { "epoch": 0.36, "grad_norm": 2.859144819762544, "learning_rate": 1.480702001047981e-05, "loss": 0.9672, "step": 2353 }, { "epoch": 0.36, "grad_norm": 3.149151970612346, "learning_rate": 1.4802672232948287e-05, "loss": 0.9672, "step": 2354 }, { "epoch": 0.36, "grad_norm": 2.961148024285571, "learning_rate": 1.4798323275054627e-05, "loss": 0.8123, "step": 2355 }, { "epoch": 0.36, "grad_norm": 3.0213153897706237, "learning_rate": 1.4793973137867679e-05, "loss": 0.9199, "step": 2356 }, { "epoch": 0.36, "grad_norm": 2.6555091707488243, "learning_rate": 1.4789621822456585e-05, "loss": 0.8879, "step": 2357 }, { "epoch": 0.36, "grad_norm": 3.1922995389939093, "learning_rate": 1.4785269329890779e-05, "loss": 0.8563, "step": 2358 }, { "epoch": 0.36, "grad_norm": 2.877845027283451, "learning_rate": 1.4780915661239979e-05, "loss": 0.9435, "step": 2359 }, { "epoch": 0.36, "grad_norm": 2.9264636830785347, "learning_rate": 1.4776560817574192e-05, "loss": 0.9215, "step": 2360 }, { "epoch": 0.36, "grad_norm": 2.8938324963130237, "learning_rate": 1.4772204799963723e-05, "loss": 0.9374, "step": 2361 }, { "epoch": 0.36, "grad_norm": 3.039456953253997, "learning_rate": 1.4767847609479155e-05, "loss": 0.9589, "step": 2362 }, { "epoch": 0.36, "grad_norm": 2.699834713722586, "learning_rate": 1.4763489247191367e-05, "loss": 0.9505, "step": 2363 }, { "epoch": 0.36, "grad_norm": 2.894943910673886, "learning_rate": 1.4759129714171515e-05, "loss": 1.0437, "step": 2364 }, { "epoch": 0.36, "grad_norm": 2.7951532789142117, "learning_rate": 1.4754769011491052e-05, "loss": 0.8761, "step": 2365 }, { "epoch": 0.36, "grad_norm": 2.8583742591953425, "learning_rate": 1.4750407140221723e-05, "loss": 0.8651, "step": 2366 }, { "epoch": 0.36, "grad_norm": 2.8018366439100553, "learning_rate": 1.4746044101435546e-05, "loss": 0.9273, "step": 2367 }, { "epoch": 0.36, "grad_norm": 3.4891138367062005, "learning_rate": 1.4741679896204842e-05, "loss": 0.935, "step": 2368 }, { "epoch": 0.36, "grad_norm": 2.806867292338262, "learning_rate": 1.47373145256022e-05, "loss": 0.851, "step": 2369 }, { "epoch": 0.36, "grad_norm": 2.9532302608146273, "learning_rate": 1.4732947990700512e-05, "loss": 0.9913, "step": 2370 }, { "epoch": 0.36, "grad_norm": 2.777258410564699, "learning_rate": 1.4728580292572947e-05, "loss": 0.9188, "step": 2371 }, { "epoch": 0.36, "grad_norm": 2.8586548046452687, "learning_rate": 1.4724211432292965e-05, "loss": 1.0016, "step": 2372 }, { "epoch": 0.36, "grad_norm": 2.7528671137789376, "learning_rate": 1.4719841410934307e-05, "loss": 0.9907, "step": 2373 }, { "epoch": 0.36, "grad_norm": 2.711606229508736, "learning_rate": 1.4715470229571007e-05, "loss": 0.938, "step": 2374 }, { "epoch": 0.36, "grad_norm": 2.6358126001386037, "learning_rate": 1.4711097889277373e-05, "loss": 0.9436, "step": 2375 }, { "epoch": 0.36, "grad_norm": 3.3690839599101254, "learning_rate": 1.4706724391128004e-05, "loss": 0.8859, "step": 2376 }, { "epoch": 0.36, "grad_norm": 2.857511482655727, "learning_rate": 1.4702349736197787e-05, "loss": 0.995, "step": 2377 }, { "epoch": 0.36, "grad_norm": 2.657782775593164, "learning_rate": 1.4697973925561885e-05, "loss": 0.9377, "step": 2378 }, { "epoch": 0.36, "grad_norm": 3.0674842520445136, "learning_rate": 1.4693596960295754e-05, "loss": 0.8398, "step": 2379 }, { "epoch": 0.36, "grad_norm": 2.6013761676243523, "learning_rate": 1.4689218841475126e-05, "loss": 0.8126, "step": 2380 }, { "epoch": 0.36, "grad_norm": 2.864805727482045, "learning_rate": 1.468483957017602e-05, "loss": 0.8925, "step": 2381 }, { "epoch": 0.36, "grad_norm": 2.800285854028283, "learning_rate": 1.4680459147474739e-05, "loss": 0.8958, "step": 2382 }, { "epoch": 0.36, "grad_norm": 3.2943055652064226, "learning_rate": 1.4676077574447867e-05, "loss": 1.0121, "step": 2383 }, { "epoch": 0.36, "grad_norm": 2.8279058966790025, "learning_rate": 1.4671694852172276e-05, "loss": 0.8836, "step": 2384 }, { "epoch": 0.37, "grad_norm": 2.5618607083364355, "learning_rate": 1.4667310981725113e-05, "loss": 0.8828, "step": 2385 }, { "epoch": 0.37, "grad_norm": 2.919796341251823, "learning_rate": 1.4662925964183807e-05, "loss": 0.8746, "step": 2386 }, { "epoch": 0.37, "grad_norm": 2.5276980612176696, "learning_rate": 1.4658539800626078e-05, "loss": 0.8823, "step": 2387 }, { "epoch": 0.37, "grad_norm": 2.6098839300692678, "learning_rate": 1.4654152492129918e-05, "loss": 0.948, "step": 2388 }, { "epoch": 0.37, "grad_norm": 2.5115034465656607, "learning_rate": 1.4649764039773606e-05, "loss": 0.9205, "step": 2389 }, { "epoch": 0.37, "grad_norm": 2.731287358860139, "learning_rate": 1.4645374444635703e-05, "loss": 0.9242, "step": 2390 }, { "epoch": 0.37, "grad_norm": 3.1344035395663186, "learning_rate": 1.4640983707795042e-05, "loss": 0.8341, "step": 2391 }, { "epoch": 0.37, "grad_norm": 2.8115383914664194, "learning_rate": 1.463659183033075e-05, "loss": 0.8833, "step": 2392 }, { "epoch": 0.37, "grad_norm": 2.7697574743430575, "learning_rate": 1.4632198813322223e-05, "loss": 0.9018, "step": 2393 }, { "epoch": 0.37, "grad_norm": 2.83330791104321, "learning_rate": 1.4627804657849143e-05, "loss": 0.9454, "step": 2394 }, { "epoch": 0.37, "grad_norm": 2.775273132429049, "learning_rate": 1.462340936499147e-05, "loss": 0.8233, "step": 2395 }, { "epoch": 0.37, "grad_norm": 2.5958681049130883, "learning_rate": 1.4619012935829444e-05, "loss": 0.8597, "step": 2396 }, { "epoch": 0.37, "grad_norm": 2.8992675561202574, "learning_rate": 1.4614615371443583e-05, "loss": 0.9323, "step": 2397 }, { "epoch": 0.37, "grad_norm": 3.2927009271512806, "learning_rate": 1.4610216672914683e-05, "loss": 0.8577, "step": 2398 }, { "epoch": 0.37, "grad_norm": 2.9704436448904388, "learning_rate": 1.4605816841323827e-05, "loss": 0.997, "step": 2399 }, { "epoch": 0.37, "grad_norm": 7.353962376662028, "learning_rate": 1.4601415877752362e-05, "loss": 1.1038, "step": 2400 }, { "epoch": 0.37, "grad_norm": 2.7215183764031874, "learning_rate": 1.459701378328193e-05, "loss": 0.9097, "step": 2401 }, { "epoch": 0.37, "grad_norm": 2.933424502380309, "learning_rate": 1.4592610558994436e-05, "loss": 0.7999, "step": 2402 }, { "epoch": 0.37, "grad_norm": 2.926601447869054, "learning_rate": 1.4588206205972074e-05, "loss": 0.9358, "step": 2403 }, { "epoch": 0.37, "grad_norm": 2.9496117509119233, "learning_rate": 1.4583800725297303e-05, "loss": 0.9057, "step": 2404 }, { "epoch": 0.37, "grad_norm": 3.254611314774993, "learning_rate": 1.4579394118052874e-05, "loss": 0.7935, "step": 2405 }, { "epoch": 0.37, "grad_norm": 2.851822266069201, "learning_rate": 1.4574986385321803e-05, "loss": 1.0653, "step": 2406 }, { "epoch": 0.37, "grad_norm": 3.1348344649384035, "learning_rate": 1.457057752818739e-05, "loss": 0.8486, "step": 2407 }, { "epoch": 0.37, "grad_norm": 2.579387784667669, "learning_rate": 1.456616754773321e-05, "loss": 0.8526, "step": 2408 }, { "epoch": 0.37, "grad_norm": 3.0468680359639984, "learning_rate": 1.4561756445043104e-05, "loss": 0.9041, "step": 2409 }, { "epoch": 0.37, "grad_norm": 3.0982245467165415, "learning_rate": 1.4557344221201206e-05, "loss": 0.9566, "step": 2410 }, { "epoch": 0.37, "grad_norm": 2.9565652107469536, "learning_rate": 1.4552930877291915e-05, "loss": 1.0297, "step": 2411 }, { "epoch": 0.37, "grad_norm": 2.8739467747522607, "learning_rate": 1.4548516414399904e-05, "loss": 0.8787, "step": 2412 }, { "epoch": 0.37, "grad_norm": 2.6437720748691778, "learning_rate": 1.4544100833610132e-05, "loss": 0.9208, "step": 2413 }, { "epoch": 0.37, "grad_norm": 2.9485877441613635, "learning_rate": 1.4539684136007815e-05, "loss": 0.987, "step": 2414 }, { "epoch": 0.37, "grad_norm": 2.7206452418675786, "learning_rate": 1.4535266322678455e-05, "loss": 0.9639, "step": 2415 }, { "epoch": 0.37, "grad_norm": 3.0018829599804486, "learning_rate": 1.453084739470783e-05, "loss": 0.8887, "step": 2416 }, { "epoch": 0.37, "grad_norm": 2.9505090712062456, "learning_rate": 1.452642735318199e-05, "loss": 0.9423, "step": 2417 }, { "epoch": 0.37, "grad_norm": 2.9778830127369367, "learning_rate": 1.4522006199187254e-05, "loss": 0.9154, "step": 2418 }, { "epoch": 0.37, "grad_norm": 2.657643161124756, "learning_rate": 1.4517583933810219e-05, "loss": 0.9314, "step": 2419 }, { "epoch": 0.37, "grad_norm": 2.8830635226181176, "learning_rate": 1.4513160558137753e-05, "loss": 0.9039, "step": 2420 }, { "epoch": 0.37, "grad_norm": 3.050561549706014, "learning_rate": 1.4508736073256997e-05, "loss": 0.873, "step": 2421 }, { "epoch": 0.37, "grad_norm": 3.3887870469835066, "learning_rate": 1.450431048025537e-05, "loss": 0.8954, "step": 2422 }, { "epoch": 0.37, "grad_norm": 2.9279258595634317, "learning_rate": 1.4499883780220552e-05, "loss": 0.9408, "step": 2423 }, { "epoch": 0.37, "grad_norm": 3.0884637941438986, "learning_rate": 1.4495455974240507e-05, "loss": 0.8688, "step": 2424 }, { "epoch": 0.37, "grad_norm": 2.855867004287512, "learning_rate": 1.4491027063403462e-05, "loss": 0.9584, "step": 2425 }, { "epoch": 0.37, "grad_norm": 2.8528079362086025, "learning_rate": 1.4486597048797922e-05, "loss": 0.9052, "step": 2426 }, { "epoch": 0.37, "grad_norm": 2.8168958102750477, "learning_rate": 1.4482165931512655e-05, "loss": 0.9273, "step": 2427 }, { "epoch": 0.37, "grad_norm": 2.879624150643087, "learning_rate": 1.447773371263671e-05, "loss": 0.9153, "step": 2428 }, { "epoch": 0.37, "grad_norm": 3.6327227672284925, "learning_rate": 1.4473300393259404e-05, "loss": 0.9121, "step": 2429 }, { "epoch": 0.37, "grad_norm": 2.7857463283637314, "learning_rate": 1.4468865974470319e-05, "loss": 0.9462, "step": 2430 }, { "epoch": 0.37, "grad_norm": 2.8478943189794337, "learning_rate": 1.4464430457359308e-05, "loss": 0.9284, "step": 2431 }, { "epoch": 0.37, "grad_norm": 3.0260805043516834, "learning_rate": 1.4459993843016501e-05, "loss": 0.9206, "step": 2432 }, { "epoch": 0.37, "grad_norm": 2.77957086923648, "learning_rate": 1.4455556132532298e-05, "loss": 0.8637, "step": 2433 }, { "epoch": 0.37, "grad_norm": 2.844634762005502, "learning_rate": 1.4451117326997355e-05, "loss": 0.9007, "step": 2434 }, { "epoch": 0.37, "grad_norm": 2.7510660636079756, "learning_rate": 1.444667742750261e-05, "loss": 0.9752, "step": 2435 }, { "epoch": 0.37, "grad_norm": 2.729279351837006, "learning_rate": 1.4442236435139265e-05, "loss": 0.9481, "step": 2436 }, { "epoch": 0.37, "grad_norm": 2.791939618896989, "learning_rate": 1.4437794350998791e-05, "loss": 0.9578, "step": 2437 }, { "epoch": 0.37, "grad_norm": 2.8060592460027998, "learning_rate": 1.4433351176172925e-05, "loss": 0.8578, "step": 2438 }, { "epoch": 0.37, "grad_norm": 6.666012032688352, "learning_rate": 1.4428906911753679e-05, "loss": 1.0896, "step": 2439 }, { "epoch": 0.37, "grad_norm": 2.8237416644883946, "learning_rate": 1.4424461558833332e-05, "loss": 0.8734, "step": 2440 }, { "epoch": 0.37, "grad_norm": 2.6397829663448653, "learning_rate": 1.442001511850442e-05, "loss": 0.8617, "step": 2441 }, { "epoch": 0.37, "grad_norm": 2.6334892262776126, "learning_rate": 1.4415567591859753e-05, "loss": 0.7519, "step": 2442 }, { "epoch": 0.37, "grad_norm": 3.1957681156517204, "learning_rate": 1.4411118979992416e-05, "loss": 1.0172, "step": 2443 }, { "epoch": 0.37, "grad_norm": 5.69562700633565, "learning_rate": 1.4406669283995747e-05, "loss": 1.2042, "step": 2444 }, { "epoch": 0.37, "grad_norm": 3.060077822738249, "learning_rate": 1.4402218504963355e-05, "loss": 0.9895, "step": 2445 }, { "epoch": 0.37, "grad_norm": 2.7709821109960866, "learning_rate": 1.4397766643989123e-05, "loss": 0.8656, "step": 2446 }, { "epoch": 0.37, "grad_norm": 2.6858104251237664, "learning_rate": 1.4393313702167185e-05, "loss": 0.9585, "step": 2447 }, { "epoch": 0.37, "grad_norm": 2.9475283693916645, "learning_rate": 1.4388859680591957e-05, "loss": 0.7974, "step": 2448 }, { "epoch": 0.37, "grad_norm": 2.7918155152676922, "learning_rate": 1.4384404580358112e-05, "loss": 0.9301, "step": 2449 }, { "epoch": 0.38, "grad_norm": 2.6422292443267072, "learning_rate": 1.4379948402560581e-05, "loss": 0.8223, "step": 2450 }, { "epoch": 0.38, "grad_norm": 2.790804524918857, "learning_rate": 1.4375491148294578e-05, "loss": 0.9744, "step": 2451 }, { "epoch": 0.38, "grad_norm": 2.862406171144873, "learning_rate": 1.4371032818655564e-05, "loss": 0.837, "step": 2452 }, { "epoch": 0.38, "grad_norm": 2.772748271528916, "learning_rate": 1.4366573414739273e-05, "loss": 0.9354, "step": 2453 }, { "epoch": 0.38, "grad_norm": 2.5730897546706624, "learning_rate": 1.4362112937641702e-05, "loss": 0.9264, "step": 2454 }, { "epoch": 0.38, "grad_norm": 2.9648680762416584, "learning_rate": 1.435765138845911e-05, "loss": 0.9132, "step": 2455 }, { "epoch": 0.38, "grad_norm": 2.96891879680553, "learning_rate": 1.4353188768288022e-05, "loss": 0.771, "step": 2456 }, { "epoch": 0.38, "grad_norm": 3.027731011106476, "learning_rate": 1.4348725078225228e-05, "loss": 0.9532, "step": 2457 }, { "epoch": 0.38, "grad_norm": 2.9217523567988866, "learning_rate": 1.434426031936777e-05, "loss": 0.9065, "step": 2458 }, { "epoch": 0.38, "grad_norm": 2.902108117677878, "learning_rate": 1.4339794492812966e-05, "loss": 1.0494, "step": 2459 }, { "epoch": 0.38, "grad_norm": 3.0654388452077606, "learning_rate": 1.433532759965839e-05, "loss": 0.8442, "step": 2460 }, { "epoch": 0.38, "grad_norm": 2.934914525516883, "learning_rate": 1.433085964100188e-05, "loss": 0.8166, "step": 2461 }, { "epoch": 0.38, "grad_norm": 2.7968856258452566, "learning_rate": 1.4326390617941533e-05, "loss": 0.9282, "step": 2462 }, { "epoch": 0.38, "grad_norm": 2.7773284905132862, "learning_rate": 1.4321920531575708e-05, "loss": 0.9476, "step": 2463 }, { "epoch": 0.38, "grad_norm": 2.8341525979924986, "learning_rate": 1.4317449383003032e-05, "loss": 0.8395, "step": 2464 }, { "epoch": 0.38, "grad_norm": 2.7509817488619133, "learning_rate": 1.4312977173322384e-05, "loss": 1.0142, "step": 2465 }, { "epoch": 0.38, "grad_norm": 3.0310197179879648, "learning_rate": 1.430850390363291e-05, "loss": 0.8766, "step": 2466 }, { "epoch": 0.38, "grad_norm": 2.9366419645028676, "learning_rate": 1.430402957503401e-05, "loss": 1.0466, "step": 2467 }, { "epoch": 0.38, "grad_norm": 2.6504441981995255, "learning_rate": 1.4299554188625352e-05, "loss": 0.8638, "step": 2468 }, { "epoch": 0.38, "grad_norm": 2.9048530088304023, "learning_rate": 1.4295077745506865e-05, "loss": 0.9458, "step": 2469 }, { "epoch": 0.38, "grad_norm": 2.8117932431738164, "learning_rate": 1.4290600246778726e-05, "loss": 0.9039, "step": 2470 }, { "epoch": 0.38, "grad_norm": 2.6908191575161906, "learning_rate": 1.4286121693541378e-05, "loss": 0.9676, "step": 2471 }, { "epoch": 0.38, "grad_norm": 3.0444535921462332, "learning_rate": 1.4281642086895527e-05, "loss": 0.9215, "step": 2472 }, { "epoch": 0.38, "grad_norm": 3.0096304418790654, "learning_rate": 1.4277161427942137e-05, "loss": 0.9357, "step": 2473 }, { "epoch": 0.38, "grad_norm": 2.9660334572945746, "learning_rate": 1.427267971778242e-05, "loss": 0.9491, "step": 2474 }, { "epoch": 0.38, "grad_norm": 2.6068280607700136, "learning_rate": 1.4268196957517866e-05, "loss": 0.973, "step": 2475 }, { "epoch": 0.38, "grad_norm": 3.076600308765351, "learning_rate": 1.4263713148250203e-05, "loss": 0.85, "step": 2476 }, { "epoch": 0.38, "grad_norm": 3.127527029548337, "learning_rate": 1.4259228291081431e-05, "loss": 1.0009, "step": 2477 }, { "epoch": 0.38, "grad_norm": 2.796536067560663, "learning_rate": 1.4254742387113795e-05, "loss": 0.8847, "step": 2478 }, { "epoch": 0.38, "grad_norm": 2.7476099402360243, "learning_rate": 1.4250255437449812e-05, "loss": 0.9103, "step": 2479 }, { "epoch": 0.38, "grad_norm": 3.4654239989935136, "learning_rate": 1.4245767443192246e-05, "loss": 0.986, "step": 2480 }, { "epoch": 0.38, "grad_norm": 3.0750075287655423, "learning_rate": 1.424127840544412e-05, "loss": 0.924, "step": 2481 }, { "epoch": 0.38, "grad_norm": 2.649710861424403, "learning_rate": 1.423678832530871e-05, "loss": 0.8283, "step": 2482 }, { "epoch": 0.38, "grad_norm": 7.986231643973584, "learning_rate": 1.4232297203889556e-05, "loss": 1.1026, "step": 2483 }, { "epoch": 0.38, "grad_norm": 2.6902403214536084, "learning_rate": 1.4227805042290447e-05, "loss": 0.858, "step": 2484 }, { "epoch": 0.38, "grad_norm": 2.86598463124649, "learning_rate": 1.4223311841615435e-05, "loss": 0.9599, "step": 2485 }, { "epoch": 0.38, "grad_norm": 2.6725105892389482, "learning_rate": 1.421881760296882e-05, "loss": 0.9663, "step": 2486 }, { "epoch": 0.38, "grad_norm": 2.5386181291260144, "learning_rate": 1.4214322327455157e-05, "loss": 0.9455, "step": 2487 }, { "epoch": 0.38, "grad_norm": 2.9625059832666265, "learning_rate": 1.4209826016179263e-05, "loss": 1.0126, "step": 2488 }, { "epoch": 0.38, "grad_norm": 2.761055038983844, "learning_rate": 1.42053286702462e-05, "loss": 0.8517, "step": 2489 }, { "epoch": 0.38, "grad_norm": 2.8698649570185073, "learning_rate": 1.4200830290761295e-05, "loss": 0.9186, "step": 2490 }, { "epoch": 0.38, "grad_norm": 3.176874951282763, "learning_rate": 1.419633087883012e-05, "loss": 0.9243, "step": 2491 }, { "epoch": 0.38, "grad_norm": 2.7541805305928913, "learning_rate": 1.419183043555851e-05, "loss": 0.8945, "step": 2492 }, { "epoch": 0.38, "grad_norm": 2.7154264957294645, "learning_rate": 1.4187328962052536e-05, "loss": 0.9352, "step": 2493 }, { "epoch": 0.38, "grad_norm": 2.769275581735851, "learning_rate": 1.4182826459418543e-05, "loss": 0.9409, "step": 2494 }, { "epoch": 0.38, "grad_norm": 2.54796386871129, "learning_rate": 1.417832292876312e-05, "loss": 0.9307, "step": 2495 }, { "epoch": 0.38, "grad_norm": 2.869832085250058, "learning_rate": 1.4173818371193106e-05, "loss": 0.9986, "step": 2496 }, { "epoch": 0.38, "grad_norm": 2.737659375631931, "learning_rate": 1.41693127878156e-05, "loss": 0.8104, "step": 2497 }, { "epoch": 0.38, "grad_norm": 3.0286718411841465, "learning_rate": 1.4164806179737935e-05, "loss": 0.8746, "step": 2498 }, { "epoch": 0.38, "grad_norm": 2.940764782727735, "learning_rate": 1.4160298548067725e-05, "loss": 0.8917, "step": 2499 }, { "epoch": 0.38, "grad_norm": 2.896939827544822, "learning_rate": 1.4155789893912808e-05, "loss": 0.8441, "step": 2500 }, { "epoch": 0.38, "grad_norm": 2.5839952314690016, "learning_rate": 1.4151280218381287e-05, "loss": 0.9311, "step": 2501 }, { "epoch": 0.38, "grad_norm": 2.845449847456862, "learning_rate": 1.4146769522581519e-05, "loss": 0.897, "step": 2502 }, { "epoch": 0.38, "grad_norm": 2.6564675143300276, "learning_rate": 1.4142257807622103e-05, "loss": 0.8739, "step": 2503 }, { "epoch": 0.38, "grad_norm": 2.80321608838917, "learning_rate": 1.4137745074611888e-05, "loss": 0.9843, "step": 2504 }, { "epoch": 0.38, "grad_norm": 2.6055525554314296, "learning_rate": 1.4133231324659984e-05, "loss": 0.8857, "step": 2505 }, { "epoch": 0.38, "grad_norm": 2.7794862939293057, "learning_rate": 1.412871655887574e-05, "loss": 0.8227, "step": 2506 }, { "epoch": 0.38, "grad_norm": 2.860975874901178, "learning_rate": 1.412420077836876e-05, "loss": 0.9598, "step": 2507 }, { "epoch": 0.38, "grad_norm": 2.95525509948893, "learning_rate": 1.4119683984248898e-05, "loss": 0.9288, "step": 2508 }, { "epoch": 0.38, "grad_norm": 3.2260253091053848, "learning_rate": 1.4115166177626252e-05, "loss": 1.0039, "step": 2509 }, { "epoch": 0.38, "grad_norm": 2.8290548924846135, "learning_rate": 1.4110647359611175e-05, "loss": 0.8938, "step": 2510 }, { "epoch": 0.38, "grad_norm": 2.9476608734997245, "learning_rate": 1.4106127531314261e-05, "loss": 0.8894, "step": 2511 }, { "epoch": 0.38, "grad_norm": 2.652241950773867, "learning_rate": 1.4101606693846363e-05, "loss": 0.9125, "step": 2512 }, { "epoch": 0.38, "grad_norm": 2.7796707100209352, "learning_rate": 1.4097084848318574e-05, "loss": 0.9231, "step": 2513 }, { "epoch": 0.38, "grad_norm": 5.863374745844007, "learning_rate": 1.4092561995842238e-05, "loss": 1.0797, "step": 2514 }, { "epoch": 0.38, "grad_norm": 2.7617959711169444, "learning_rate": 1.4088038137528938e-05, "loss": 0.9663, "step": 2515 }, { "epoch": 0.39, "grad_norm": 3.163921586737289, "learning_rate": 1.408351327449052e-05, "loss": 0.9333, "step": 2516 }, { "epoch": 0.39, "grad_norm": 2.7792699930149816, "learning_rate": 1.4078987407839066e-05, "loss": 0.8374, "step": 2517 }, { "epoch": 0.39, "grad_norm": 2.7188267540892985, "learning_rate": 1.4074460538686908e-05, "loss": 0.8946, "step": 2518 }, { "epoch": 0.39, "grad_norm": 2.6558835918581876, "learning_rate": 1.4069932668146619e-05, "loss": 0.9045, "step": 2519 }, { "epoch": 0.39, "grad_norm": 2.866876242378881, "learning_rate": 1.406540379733103e-05, "loss": 0.8737, "step": 2520 }, { "epoch": 0.39, "grad_norm": 3.198276090274672, "learning_rate": 1.4060873927353203e-05, "loss": 0.9967, "step": 2521 }, { "epoch": 0.39, "grad_norm": 2.5484619529654458, "learning_rate": 1.4056343059326458e-05, "loss": 0.8782, "step": 2522 }, { "epoch": 0.39, "grad_norm": 2.743265614544366, "learning_rate": 1.405181119436435e-05, "loss": 0.8819, "step": 2523 }, { "epoch": 0.39, "grad_norm": 3.084943695798522, "learning_rate": 1.4047278333580689e-05, "loss": 0.8367, "step": 2524 }, { "epoch": 0.39, "grad_norm": 2.6568453931938603, "learning_rate": 1.4042744478089528e-05, "loss": 0.9544, "step": 2525 }, { "epoch": 0.39, "grad_norm": 2.9522303052046026, "learning_rate": 1.4038209629005156e-05, "loss": 0.8682, "step": 2526 }, { "epoch": 0.39, "grad_norm": 3.0082325997961306, "learning_rate": 1.4033673787442108e-05, "loss": 0.8472, "step": 2527 }, { "epoch": 0.39, "grad_norm": 2.5897995504801026, "learning_rate": 1.4029136954515175e-05, "loss": 0.9053, "step": 2528 }, { "epoch": 0.39, "grad_norm": 4.056262723244552, "learning_rate": 1.402459913133938e-05, "loss": 0.9708, "step": 2529 }, { "epoch": 0.39, "grad_norm": 2.830129743021095, "learning_rate": 1.4020060319029991e-05, "loss": 1.0032, "step": 2530 }, { "epoch": 0.39, "grad_norm": 2.6939067046894354, "learning_rate": 1.4015520518702526e-05, "loss": 0.8888, "step": 2531 }, { "epoch": 0.39, "grad_norm": 2.655218785250219, "learning_rate": 1.4010979731472731e-05, "loss": 0.8829, "step": 2532 }, { "epoch": 0.39, "grad_norm": 2.67010313060736, "learning_rate": 1.4006437958456616e-05, "loss": 0.8782, "step": 2533 }, { "epoch": 0.39, "grad_norm": 2.861776718201834, "learning_rate": 1.4001895200770412e-05, "loss": 0.9893, "step": 2534 }, { "epoch": 0.39, "grad_norm": 2.64690624389765, "learning_rate": 1.3997351459530605e-05, "loss": 0.8615, "step": 2535 }, { "epoch": 0.39, "grad_norm": 2.82273974702354, "learning_rate": 1.3992806735853924e-05, "loss": 1.0044, "step": 2536 }, { "epoch": 0.39, "grad_norm": 2.7952746643921755, "learning_rate": 1.3988261030857327e-05, "loss": 0.853, "step": 2537 }, { "epoch": 0.39, "grad_norm": 3.0034032826719534, "learning_rate": 1.3983714345658021e-05, "loss": 1.0211, "step": 2538 }, { "epoch": 0.39, "grad_norm": 2.683248570522212, "learning_rate": 1.3979166681373459e-05, "loss": 0.8442, "step": 2539 }, { "epoch": 0.39, "grad_norm": 2.8977292429823303, "learning_rate": 1.3974618039121326e-05, "loss": 0.8503, "step": 2540 }, { "epoch": 0.39, "grad_norm": 2.838726270766741, "learning_rate": 1.3970068420019552e-05, "loss": 0.9449, "step": 2541 }, { "epoch": 0.39, "grad_norm": 2.6119929718476276, "learning_rate": 1.3965517825186306e-05, "loss": 0.9552, "step": 2542 }, { "epoch": 0.39, "grad_norm": 2.691711091798615, "learning_rate": 1.3960966255739992e-05, "loss": 0.9775, "step": 2543 }, { "epoch": 0.39, "grad_norm": 2.886609800357804, "learning_rate": 1.3956413712799263e-05, "loss": 0.9099, "step": 2544 }, { "epoch": 0.39, "grad_norm": 2.5510696879764474, "learning_rate": 1.3951860197483008e-05, "loss": 0.8674, "step": 2545 }, { "epoch": 0.39, "grad_norm": 3.2071989200026714, "learning_rate": 1.3947305710910346e-05, "loss": 0.8818, "step": 2546 }, { "epoch": 0.39, "grad_norm": 2.7232029556025936, "learning_rate": 1.394275025420065e-05, "loss": 0.8767, "step": 2547 }, { "epoch": 0.39, "grad_norm": 2.822174025808589, "learning_rate": 1.3938193828473521e-05, "loss": 1.0398, "step": 2548 }, { "epoch": 0.39, "grad_norm": 2.91246822858765, "learning_rate": 1.3933636434848797e-05, "loss": 0.8926, "step": 2549 }, { "epoch": 0.39, "grad_norm": 2.7026549086967, "learning_rate": 1.3929078074446561e-05, "loss": 1.0069, "step": 2550 }, { "epoch": 0.39, "grad_norm": 3.3907508424871993, "learning_rate": 1.392451874838713e-05, "loss": 0.8861, "step": 2551 }, { "epoch": 0.39, "grad_norm": 3.365881535947219, "learning_rate": 1.3919958457791056e-05, "loss": 0.9715, "step": 2552 }, { "epoch": 0.39, "grad_norm": 2.9741631822721617, "learning_rate": 1.3915397203779138e-05, "loss": 0.8546, "step": 2553 }, { "epoch": 0.39, "grad_norm": 2.4551308205268096, "learning_rate": 1.3910834987472393e-05, "loss": 0.8067, "step": 2554 }, { "epoch": 0.39, "grad_norm": 2.7640869588849273, "learning_rate": 1.3906271809992093e-05, "loss": 0.8744, "step": 2555 }, { "epoch": 0.39, "grad_norm": 2.7444788971223937, "learning_rate": 1.3901707672459738e-05, "loss": 0.9381, "step": 2556 }, { "epoch": 0.39, "grad_norm": 2.8966581939330647, "learning_rate": 1.3897142575997062e-05, "loss": 0.8446, "step": 2557 }, { "epoch": 0.39, "grad_norm": 2.7516082432017703, "learning_rate": 1.3892576521726045e-05, "loss": 0.8426, "step": 2558 }, { "epoch": 0.39, "grad_norm": 2.7161833682050105, "learning_rate": 1.388800951076889e-05, "loss": 0.927, "step": 2559 }, { "epoch": 0.39, "grad_norm": 2.8069829382886025, "learning_rate": 1.3883441544248037e-05, "loss": 0.903, "step": 2560 }, { "epoch": 0.39, "grad_norm": 2.8060823802410213, "learning_rate": 1.3878872623286169e-05, "loss": 0.9407, "step": 2561 }, { "epoch": 0.39, "grad_norm": 3.1672323489059027, "learning_rate": 1.38743027490062e-05, "loss": 0.8627, "step": 2562 }, { "epoch": 0.39, "grad_norm": 2.9899067313211556, "learning_rate": 1.386973192253127e-05, "loss": 0.9436, "step": 2563 }, { "epoch": 0.39, "grad_norm": 2.742952405237023, "learning_rate": 1.3865160144984766e-05, "loss": 0.7929, "step": 2564 }, { "epoch": 0.39, "grad_norm": 2.709277125727227, "learning_rate": 1.38605874174903e-05, "loss": 0.8206, "step": 2565 }, { "epoch": 0.39, "grad_norm": 2.8380760313223443, "learning_rate": 1.3856013741171723e-05, "loss": 0.9272, "step": 2566 }, { "epoch": 0.39, "grad_norm": 2.557049466152109, "learning_rate": 1.3851439117153114e-05, "loss": 0.8834, "step": 2567 }, { "epoch": 0.39, "grad_norm": 2.674380736873026, "learning_rate": 1.3846863546558783e-05, "loss": 0.8271, "step": 2568 }, { "epoch": 0.39, "grad_norm": 2.729408030623172, "learning_rate": 1.3842287030513287e-05, "loss": 0.917, "step": 2569 }, { "epoch": 0.39, "grad_norm": 2.4925015801647987, "learning_rate": 1.3837709570141401e-05, "loss": 0.8981, "step": 2570 }, { "epoch": 0.39, "grad_norm": 2.881328573934881, "learning_rate": 1.3833131166568132e-05, "loss": 0.9229, "step": 2571 }, { "epoch": 0.39, "grad_norm": 2.6522807333490324, "learning_rate": 1.3828551820918726e-05, "loss": 1.0144, "step": 2572 }, { "epoch": 0.39, "grad_norm": 2.631217876018588, "learning_rate": 1.382397153431866e-05, "loss": 0.8376, "step": 2573 }, { "epoch": 0.39, "grad_norm": 2.752984398348073, "learning_rate": 1.3819390307893637e-05, "loss": 0.8932, "step": 2574 }, { "epoch": 0.39, "grad_norm": 2.6337997335427636, "learning_rate": 1.3814808142769596e-05, "loss": 0.8712, "step": 2575 }, { "epoch": 0.39, "grad_norm": 2.767756879305521, "learning_rate": 1.3810225040072702e-05, "loss": 0.8711, "step": 2576 }, { "epoch": 0.39, "grad_norm": 2.833671564740066, "learning_rate": 1.380564100092936e-05, "loss": 0.9122, "step": 2577 }, { "epoch": 0.39, "grad_norm": 2.6823193648331993, "learning_rate": 1.3801056026466187e-05, "loss": 0.9515, "step": 2578 }, { "epoch": 0.39, "grad_norm": 2.7052167356636985, "learning_rate": 1.3796470117810047e-05, "loss": 0.9358, "step": 2579 }, { "epoch": 0.39, "grad_norm": 2.7684445261992323, "learning_rate": 1.3791883276088032e-05, "loss": 0.9435, "step": 2580 }, { "epoch": 0.4, "grad_norm": 2.7770127368337647, "learning_rate": 1.3787295502427456e-05, "loss": 1.0437, "step": 2581 }, { "epoch": 0.4, "grad_norm": 2.7540315264272577, "learning_rate": 1.3782706797955862e-05, "loss": 0.8721, "step": 2582 }, { "epoch": 0.4, "grad_norm": 2.6708408424496337, "learning_rate": 1.3778117163801026e-05, "loss": 0.8377, "step": 2583 }, { "epoch": 0.4, "grad_norm": 2.4750890755484996, "learning_rate": 1.3773526601090953e-05, "loss": 0.9632, "step": 2584 }, { "epoch": 0.4, "grad_norm": 2.8539918952627445, "learning_rate": 1.3768935110953876e-05, "loss": 0.8852, "step": 2585 }, { "epoch": 0.4, "grad_norm": 2.7756479228382283, "learning_rate": 1.376434269451825e-05, "loss": 0.9469, "step": 2586 }, { "epoch": 0.4, "grad_norm": 2.8092528873183813, "learning_rate": 1.3759749352912766e-05, "loss": 0.8656, "step": 2587 }, { "epoch": 0.4, "grad_norm": 2.7460581990366535, "learning_rate": 1.3755155087266339e-05, "loss": 0.9428, "step": 2588 }, { "epoch": 0.4, "grad_norm": 2.6634448363350254, "learning_rate": 1.3750559898708104e-05, "loss": 0.7914, "step": 2589 }, { "epoch": 0.4, "grad_norm": 3.3016500768582326, "learning_rate": 1.3745963788367438e-05, "loss": 0.9247, "step": 2590 }, { "epoch": 0.4, "grad_norm": 2.8604673823201012, "learning_rate": 1.3741366757373928e-05, "loss": 0.7247, "step": 2591 }, { "epoch": 0.4, "grad_norm": 2.634064735941571, "learning_rate": 1.3736768806857405e-05, "loss": 0.7969, "step": 2592 }, { "epoch": 0.4, "grad_norm": 2.6425265646373126, "learning_rate": 1.373216993794791e-05, "loss": 0.7945, "step": 2593 }, { "epoch": 0.4, "grad_norm": 2.8211610865763577, "learning_rate": 1.3727570151775716e-05, "loss": 0.9039, "step": 2594 }, { "epoch": 0.4, "grad_norm": 2.569205222997264, "learning_rate": 1.3722969449471319e-05, "loss": 0.9151, "step": 2595 }, { "epoch": 0.4, "grad_norm": 2.716633530129442, "learning_rate": 1.3718367832165451e-05, "loss": 0.8682, "step": 2596 }, { "epoch": 0.4, "grad_norm": 2.9394906838196153, "learning_rate": 1.3713765300989053e-05, "loss": 0.8901, "step": 2597 }, { "epoch": 0.4, "grad_norm": 2.8753101369751275, "learning_rate": 1.37091618570733e-05, "loss": 1.0159, "step": 2598 }, { "epoch": 0.4, "grad_norm": 2.6976133669347346, "learning_rate": 1.3704557501549594e-05, "loss": 0.8259, "step": 2599 }, { "epoch": 0.4, "grad_norm": 2.9676339712488287, "learning_rate": 1.3699952235549547e-05, "loss": 0.8872, "step": 2600 }, { "epoch": 0.4, "grad_norm": 3.015750805388782, "learning_rate": 1.369534606020501e-05, "loss": 0.9446, "step": 2601 }, { "epoch": 0.4, "grad_norm": 3.0731985649847657, "learning_rate": 1.3690738976648053e-05, "loss": 1.0013, "step": 2602 }, { "epoch": 0.4, "grad_norm": 3.163854041137835, "learning_rate": 1.3686130986010965e-05, "loss": 0.9987, "step": 2603 }, { "epoch": 0.4, "grad_norm": 2.706206243594574, "learning_rate": 1.3681522089426265e-05, "loss": 0.8524, "step": 2604 }, { "epoch": 0.4, "grad_norm": 2.575483493975654, "learning_rate": 1.3676912288026685e-05, "loss": 0.7937, "step": 2605 }, { "epoch": 0.4, "grad_norm": 2.9879921457113077, "learning_rate": 1.3672301582945187e-05, "loss": 0.8572, "step": 2606 }, { "epoch": 0.4, "grad_norm": 2.853495583921802, "learning_rate": 1.3667689975314955e-05, "loss": 0.98, "step": 2607 }, { "epoch": 0.4, "grad_norm": 2.90434906178557, "learning_rate": 1.366307746626939e-05, "loss": 0.8291, "step": 2608 }, { "epoch": 0.4, "grad_norm": 2.837540695181841, "learning_rate": 1.365846405694212e-05, "loss": 0.8312, "step": 2609 }, { "epoch": 0.4, "grad_norm": 6.641247798531113, "learning_rate": 1.3653849748466991e-05, "loss": 1.1737, "step": 2610 }, { "epoch": 0.4, "grad_norm": 2.845182795899973, "learning_rate": 1.364923454197807e-05, "loss": 0.9513, "step": 2611 }, { "epoch": 0.4, "grad_norm": 2.8796126601879175, "learning_rate": 1.3644618438609643e-05, "loss": 0.8865, "step": 2612 }, { "epoch": 0.4, "grad_norm": 2.8526823560302756, "learning_rate": 1.364000143949622e-05, "loss": 0.9331, "step": 2613 }, { "epoch": 0.4, "grad_norm": 4.3528468609485875, "learning_rate": 1.3635383545772534e-05, "loss": 0.8366, "step": 2614 }, { "epoch": 0.4, "grad_norm": 3.41234900636786, "learning_rate": 1.3630764758573529e-05, "loss": 0.834, "step": 2615 }, { "epoch": 0.4, "grad_norm": 2.988943225087445, "learning_rate": 1.3626145079034374e-05, "loss": 0.9219, "step": 2616 }, { "epoch": 0.4, "grad_norm": 2.9776376496628636, "learning_rate": 1.3621524508290457e-05, "loss": 0.9286, "step": 2617 }, { "epoch": 0.4, "grad_norm": 3.229730135176999, "learning_rate": 1.361690304747739e-05, "loss": 0.9246, "step": 2618 }, { "epoch": 0.4, "grad_norm": 2.9490639624794284, "learning_rate": 1.361228069773099e-05, "loss": 0.9191, "step": 2619 }, { "epoch": 0.4, "grad_norm": 2.9437329662922713, "learning_rate": 1.3607657460187307e-05, "loss": 0.9408, "step": 2620 }, { "epoch": 0.4, "grad_norm": 2.5307046738228607, "learning_rate": 1.36030333359826e-05, "loss": 0.8944, "step": 2621 }, { "epoch": 0.4, "grad_norm": 2.6236321071528637, "learning_rate": 1.3598408326253348e-05, "loss": 0.8936, "step": 2622 }, { "epoch": 0.4, "grad_norm": 3.0871202845956263, "learning_rate": 1.3593782432136251e-05, "loss": 0.9609, "step": 2623 }, { "epoch": 0.4, "grad_norm": 2.6858071831140933, "learning_rate": 1.3589155654768224e-05, "loss": 0.8933, "step": 2624 }, { "epoch": 0.4, "grad_norm": 8.045716570735623, "learning_rate": 1.35845279952864e-05, "loss": 1.1436, "step": 2625 }, { "epoch": 0.4, "grad_norm": 2.8137361622080816, "learning_rate": 1.3579899454828126e-05, "loss": 0.9405, "step": 2626 }, { "epoch": 0.4, "grad_norm": 2.8962261675877774, "learning_rate": 1.3575270034530967e-05, "loss": 0.8779, "step": 2627 }, { "epoch": 0.4, "grad_norm": 2.6786738209282155, "learning_rate": 1.3570639735532707e-05, "loss": 0.8522, "step": 2628 }, { "epoch": 0.4, "grad_norm": 2.8984792337350838, "learning_rate": 1.3566008558971342e-05, "loss": 0.8925, "step": 2629 }, { "epoch": 0.4, "grad_norm": 2.6947101762433547, "learning_rate": 1.3561376505985085e-05, "loss": 0.8026, "step": 2630 }, { "epoch": 0.4, "grad_norm": 2.7174665627606913, "learning_rate": 1.3556743577712363e-05, "loss": 0.929, "step": 2631 }, { "epoch": 0.4, "grad_norm": 2.8030923315646157, "learning_rate": 1.3552109775291828e-05, "loss": 0.9614, "step": 2632 }, { "epoch": 0.4, "grad_norm": 3.067036667463295, "learning_rate": 1.354747509986233e-05, "loss": 0.8985, "step": 2633 }, { "epoch": 0.4, "grad_norm": 2.737467558758066, "learning_rate": 1.3542839552562945e-05, "loss": 0.9098, "step": 2634 }, { "epoch": 0.4, "grad_norm": 2.9905535153970537, "learning_rate": 1.353820313453296e-05, "loss": 0.8895, "step": 2635 }, { "epoch": 0.4, "grad_norm": 2.8996950341165877, "learning_rate": 1.353356584691188e-05, "loss": 0.9421, "step": 2636 }, { "epoch": 0.4, "grad_norm": 3.1401490422536424, "learning_rate": 1.3528927690839414e-05, "loss": 0.8608, "step": 2637 }, { "epoch": 0.4, "grad_norm": 2.7015344706197664, "learning_rate": 1.3524288667455497e-05, "loss": 0.9702, "step": 2638 }, { "epoch": 0.4, "grad_norm": 2.580217257775127, "learning_rate": 1.3519648777900264e-05, "loss": 0.8723, "step": 2639 }, { "epoch": 0.4, "grad_norm": 3.0334489963523077, "learning_rate": 1.3515008023314077e-05, "loss": 0.9329, "step": 2640 }, { "epoch": 0.4, "grad_norm": 2.94368863839663, "learning_rate": 1.3510366404837499e-05, "loss": 0.8787, "step": 2641 }, { "epoch": 0.4, "grad_norm": 2.4523195891235248, "learning_rate": 1.3505723923611309e-05, "loss": 0.85, "step": 2642 }, { "epoch": 0.4, "grad_norm": 2.845812910882723, "learning_rate": 1.3501080580776504e-05, "loss": 0.9852, "step": 2643 }, { "epoch": 0.4, "grad_norm": 2.6528192407446602, "learning_rate": 1.3496436377474282e-05, "loss": 0.8928, "step": 2644 }, { "epoch": 0.4, "grad_norm": 2.815621799890215, "learning_rate": 1.3491791314846059e-05, "loss": 0.857, "step": 2645 }, { "epoch": 0.41, "grad_norm": 3.398570829960979, "learning_rate": 1.348714539403346e-05, "loss": 0.8943, "step": 2646 }, { "epoch": 0.41, "grad_norm": 2.7354191271811326, "learning_rate": 1.3482498616178329e-05, "loss": 0.9616, "step": 2647 }, { "epoch": 0.41, "grad_norm": 3.0440592594486175, "learning_rate": 1.3477850982422704e-05, "loss": 0.8634, "step": 2648 }, { "epoch": 0.41, "grad_norm": 2.6829014105178377, "learning_rate": 1.3473202493908847e-05, "loss": 0.9402, "step": 2649 }, { "epoch": 0.41, "grad_norm": 2.6462089340973725, "learning_rate": 1.3468553151779229e-05, "loss": 0.7961, "step": 2650 }, { "epoch": 0.41, "grad_norm": 2.9826913776524546, "learning_rate": 1.3463902957176526e-05, "loss": 0.8488, "step": 2651 }, { "epoch": 0.41, "grad_norm": 2.8634755106514653, "learning_rate": 1.3459251911243623e-05, "loss": 0.8495, "step": 2652 }, { "epoch": 0.41, "grad_norm": 2.6810739867840625, "learning_rate": 1.345460001512362e-05, "loss": 1.0048, "step": 2653 }, { "epoch": 0.41, "grad_norm": 2.8634739647852445, "learning_rate": 1.344994726995982e-05, "loss": 0.9138, "step": 2654 }, { "epoch": 0.41, "grad_norm": 3.0049448169785546, "learning_rate": 1.3445293676895742e-05, "loss": 0.9287, "step": 2655 }, { "epoch": 0.41, "grad_norm": 2.8368502452839635, "learning_rate": 1.34406392370751e-05, "loss": 0.8992, "step": 2656 }, { "epoch": 0.41, "grad_norm": 2.7559802643598608, "learning_rate": 1.3435983951641831e-05, "loss": 0.844, "step": 2657 }, { "epoch": 0.41, "grad_norm": 3.1639489232846976, "learning_rate": 1.3431327821740074e-05, "loss": 0.9783, "step": 2658 }, { "epoch": 0.41, "grad_norm": 2.5013584115803975, "learning_rate": 1.3426670848514172e-05, "loss": 0.8197, "step": 2659 }, { "epoch": 0.41, "grad_norm": 2.6903599600202748, "learning_rate": 1.3422013033108683e-05, "loss": 0.8648, "step": 2660 }, { "epoch": 0.41, "grad_norm": 2.889426640643883, "learning_rate": 1.341735437666836e-05, "loss": 0.9477, "step": 2661 }, { "epoch": 0.41, "grad_norm": 2.561490050920701, "learning_rate": 1.341269488033818e-05, "loss": 0.8577, "step": 2662 }, { "epoch": 0.41, "grad_norm": 2.93139917411127, "learning_rate": 1.3408034545263307e-05, "loss": 0.8558, "step": 2663 }, { "epoch": 0.41, "grad_norm": 2.8483228045095426, "learning_rate": 1.3403373372589126e-05, "loss": 0.8472, "step": 2664 }, { "epoch": 0.41, "grad_norm": 2.8900801444678326, "learning_rate": 1.339871136346122e-05, "loss": 0.9375, "step": 2665 }, { "epoch": 0.41, "grad_norm": 2.761176789590491, "learning_rate": 1.3394048519025385e-05, "loss": 0.88, "step": 2666 }, { "epoch": 0.41, "grad_norm": 2.995974666022481, "learning_rate": 1.3389384840427609e-05, "loss": 0.9282, "step": 2667 }, { "epoch": 0.41, "grad_norm": 2.795462437751262, "learning_rate": 1.3384720328814101e-05, "loss": 0.8563, "step": 2668 }, { "epoch": 0.41, "grad_norm": 2.8992183719809117, "learning_rate": 1.338005498533126e-05, "loss": 0.9361, "step": 2669 }, { "epoch": 0.41, "grad_norm": 2.6516545078942926, "learning_rate": 1.3375388811125707e-05, "loss": 0.9369, "step": 2670 }, { "epoch": 0.41, "grad_norm": 2.369168985699152, "learning_rate": 1.337072180734425e-05, "loss": 0.7759, "step": 2671 }, { "epoch": 0.41, "grad_norm": 3.088417388011945, "learning_rate": 1.3366053975133904e-05, "loss": 0.8851, "step": 2672 }, { "epoch": 0.41, "grad_norm": 2.9663036317798768, "learning_rate": 1.3361385315641898e-05, "loss": 0.9257, "step": 2673 }, { "epoch": 0.41, "grad_norm": 3.1256307601041797, "learning_rate": 1.3356715830015652e-05, "loss": 0.9925, "step": 2674 }, { "epoch": 0.41, "grad_norm": 7.090405001302476, "learning_rate": 1.3352045519402799e-05, "loss": 1.1464, "step": 2675 }, { "epoch": 0.41, "grad_norm": 2.604324707239911, "learning_rate": 1.3347374384951171e-05, "loss": 0.8574, "step": 2676 }, { "epoch": 0.41, "grad_norm": 2.7363852452328645, "learning_rate": 1.33427024278088e-05, "loss": 0.8872, "step": 2677 }, { "epoch": 0.41, "grad_norm": 3.7479083095110153, "learning_rate": 1.333802964912392e-05, "loss": 0.8885, "step": 2678 }, { "epoch": 0.41, "grad_norm": 2.7348959309497, "learning_rate": 1.333335605004497e-05, "loss": 0.8658, "step": 2679 }, { "epoch": 0.41, "grad_norm": 3.015353432771644, "learning_rate": 1.332868163172059e-05, "loss": 0.949, "step": 2680 }, { "epoch": 0.41, "grad_norm": 2.513831609319815, "learning_rate": 1.3324006395299624e-05, "loss": 0.8491, "step": 2681 }, { "epoch": 0.41, "grad_norm": 2.646123238994641, "learning_rate": 1.3319330341931112e-05, "loss": 0.8096, "step": 2682 }, { "epoch": 0.41, "grad_norm": 2.870007829546668, "learning_rate": 1.3314653472764293e-05, "loss": 0.8376, "step": 2683 }, { "epoch": 0.41, "grad_norm": 5.54424602096051, "learning_rate": 1.3309975788948616e-05, "loss": 1.0279, "step": 2684 }, { "epoch": 0.41, "grad_norm": 2.650425831800214, "learning_rate": 1.330529729163372e-05, "loss": 0.8417, "step": 2685 }, { "epoch": 0.41, "grad_norm": 2.793823761613536, "learning_rate": 1.330061798196945e-05, "loss": 0.858, "step": 2686 }, { "epoch": 0.41, "grad_norm": 3.0661166656301595, "learning_rate": 1.3295937861105848e-05, "loss": 0.8369, "step": 2687 }, { "epoch": 0.41, "grad_norm": 2.8365228650619096, "learning_rate": 1.3291256930193164e-05, "loss": 0.8982, "step": 2688 }, { "epoch": 0.41, "grad_norm": 2.829947055923032, "learning_rate": 1.3286575190381828e-05, "loss": 0.9457, "step": 2689 }, { "epoch": 0.41, "grad_norm": 2.8245219608219343, "learning_rate": 1.3281892642822488e-05, "loss": 0.8713, "step": 2690 }, { "epoch": 0.41, "grad_norm": 2.7515076639581935, "learning_rate": 1.3277209288665977e-05, "loss": 0.8659, "step": 2691 }, { "epoch": 0.41, "grad_norm": 3.039281991650958, "learning_rate": 1.3272525129063339e-05, "loss": 0.9434, "step": 2692 }, { "epoch": 0.41, "grad_norm": 2.78357479127376, "learning_rate": 1.3267840165165802e-05, "loss": 0.9466, "step": 2693 }, { "epoch": 0.41, "grad_norm": 2.708980316331865, "learning_rate": 1.3263154398124807e-05, "loss": 0.7815, "step": 2694 }, { "epoch": 0.41, "grad_norm": 2.539856934636554, "learning_rate": 1.325846782909198e-05, "loss": 0.8458, "step": 2695 }, { "epoch": 0.41, "grad_norm": 2.9120051675086893, "learning_rate": 1.3253780459219143e-05, "loss": 0.9523, "step": 2696 }, { "epoch": 0.41, "grad_norm": 2.6221844576287014, "learning_rate": 1.3249092289658327e-05, "loss": 0.8075, "step": 2697 }, { "epoch": 0.41, "grad_norm": 3.1776531212135213, "learning_rate": 1.324440332156175e-05, "loss": 0.8592, "step": 2698 }, { "epoch": 0.41, "grad_norm": 2.8181917492879025, "learning_rate": 1.323971355608183e-05, "loss": 0.9806, "step": 2699 }, { "epoch": 0.41, "grad_norm": 2.916697538337927, "learning_rate": 1.323502299437118e-05, "loss": 0.9191, "step": 2700 }, { "epoch": 0.41, "grad_norm": 2.682544239590553, "learning_rate": 1.3230331637582605e-05, "loss": 0.9452, "step": 2701 }, { "epoch": 0.41, "grad_norm": 2.803323300095509, "learning_rate": 1.3225639486869113e-05, "loss": 0.8157, "step": 2702 }, { "epoch": 0.41, "grad_norm": 2.7628897421404925, "learning_rate": 1.3220946543383904e-05, "loss": 0.8419, "step": 2703 }, { "epoch": 0.41, "grad_norm": 2.9899136183471016, "learning_rate": 1.3216252808280366e-05, "loss": 0.9206, "step": 2704 }, { "epoch": 0.41, "grad_norm": 2.9304013279937533, "learning_rate": 1.3211558282712092e-05, "loss": 0.8583, "step": 2705 }, { "epoch": 0.41, "grad_norm": 3.0558376279085238, "learning_rate": 1.3206862967832863e-05, "loss": 0.9753, "step": 2706 }, { "epoch": 0.41, "grad_norm": 7.049415755795687, "learning_rate": 1.3202166864796659e-05, "loss": 1.0676, "step": 2707 }, { "epoch": 0.41, "grad_norm": 2.903397325916613, "learning_rate": 1.3197469974757644e-05, "loss": 0.8897, "step": 2708 }, { "epoch": 0.41, "grad_norm": 2.927310383815381, "learning_rate": 1.3192772298870187e-05, "loss": 0.8699, "step": 2709 }, { "epoch": 0.41, "grad_norm": 2.7231277330666335, "learning_rate": 1.3188073838288844e-05, "loss": 0.7612, "step": 2710 }, { "epoch": 0.41, "grad_norm": 2.9162454164657357, "learning_rate": 1.3183374594168368e-05, "loss": 1.0089, "step": 2711 }, { "epoch": 0.42, "grad_norm": 2.759293862016098, "learning_rate": 1.3178674567663692e-05, "loss": 0.9146, "step": 2712 }, { "epoch": 0.42, "grad_norm": 2.873108983408784, "learning_rate": 1.3173973759929956e-05, "loss": 0.8961, "step": 2713 }, { "epoch": 0.42, "grad_norm": 2.900800727457516, "learning_rate": 1.3169272172122493e-05, "loss": 0.8252, "step": 2714 }, { "epoch": 0.42, "grad_norm": 2.671252646096681, "learning_rate": 1.3164569805396812e-05, "loss": 0.9027, "step": 2715 }, { "epoch": 0.42, "grad_norm": 2.4209487105746272, "learning_rate": 1.3159866660908629e-05, "loss": 0.818, "step": 2716 }, { "epoch": 0.42, "grad_norm": 2.596162460313096, "learning_rate": 1.315516273981384e-05, "loss": 0.8533, "step": 2717 }, { "epoch": 0.42, "grad_norm": 2.8102198032524477, "learning_rate": 1.3150458043268541e-05, "loss": 0.9579, "step": 2718 }, { "epoch": 0.42, "grad_norm": 2.7144446997601652, "learning_rate": 1.3145752572429012e-05, "loss": 0.9368, "step": 2719 }, { "epoch": 0.42, "grad_norm": 2.9364685528235817, "learning_rate": 1.3141046328451724e-05, "loss": 0.8868, "step": 2720 }, { "epoch": 0.42, "grad_norm": 4.064401225713515, "learning_rate": 1.3136339312493346e-05, "loss": 0.9467, "step": 2721 }, { "epoch": 0.42, "grad_norm": 2.5323842705217143, "learning_rate": 1.3131631525710729e-05, "loss": 0.822, "step": 2722 }, { "epoch": 0.42, "grad_norm": 2.936538224523595, "learning_rate": 1.3126922969260908e-05, "loss": 0.8762, "step": 2723 }, { "epoch": 0.42, "grad_norm": 2.640141372170476, "learning_rate": 1.312221364430112e-05, "loss": 0.9057, "step": 2724 }, { "epoch": 0.42, "grad_norm": 2.9851422679608923, "learning_rate": 1.3117503551988786e-05, "loss": 0.9964, "step": 2725 }, { "epoch": 0.42, "grad_norm": 2.729257381358218, "learning_rate": 1.311279269348151e-05, "loss": 0.905, "step": 2726 }, { "epoch": 0.42, "grad_norm": 2.8125014739544567, "learning_rate": 1.3108081069937095e-05, "loss": 0.8715, "step": 2727 }, { "epoch": 0.42, "grad_norm": 3.0387568343577667, "learning_rate": 1.3103368682513519e-05, "loss": 0.8337, "step": 2728 }, { "epoch": 0.42, "grad_norm": 3.0422667008518354, "learning_rate": 1.309865553236896e-05, "loss": 0.8908, "step": 2729 }, { "epoch": 0.42, "grad_norm": 2.685792667296222, "learning_rate": 1.3093941620661777e-05, "loss": 0.8665, "step": 2730 }, { "epoch": 0.42, "grad_norm": 2.864671960381527, "learning_rate": 1.3089226948550513e-05, "loss": 0.8474, "step": 2731 }, { "epoch": 0.42, "grad_norm": 2.6702622460485546, "learning_rate": 1.3084511517193908e-05, "loss": 0.8985, "step": 2732 }, { "epoch": 0.42, "grad_norm": 2.943472642915905, "learning_rate": 1.3079795327750882e-05, "loss": 0.8237, "step": 2733 }, { "epoch": 0.42, "grad_norm": 2.80814879490897, "learning_rate": 1.307507838138054e-05, "loss": 0.9702, "step": 2734 }, { "epoch": 0.42, "grad_norm": 2.763516539187601, "learning_rate": 1.3070360679242171e-05, "loss": 0.9229, "step": 2735 }, { "epoch": 0.42, "grad_norm": 2.513463001939668, "learning_rate": 1.3065642222495263e-05, "loss": 0.8847, "step": 2736 }, { "epoch": 0.42, "grad_norm": 2.6352679845951053, "learning_rate": 1.3060923012299474e-05, "loss": 0.808, "step": 2737 }, { "epoch": 0.42, "grad_norm": 2.6534091049974515, "learning_rate": 1.3056203049814657e-05, "loss": 0.8816, "step": 2738 }, { "epoch": 0.42, "grad_norm": 2.6125283309661, "learning_rate": 1.3051482336200844e-05, "loss": 0.8314, "step": 2739 }, { "epoch": 0.42, "grad_norm": 2.5198187375176504, "learning_rate": 1.3046760872618256e-05, "loss": 0.8629, "step": 2740 }, { "epoch": 0.42, "grad_norm": 3.314150472453498, "learning_rate": 1.3042038660227294e-05, "loss": 0.9143, "step": 2741 }, { "epoch": 0.42, "grad_norm": 2.682097793806096, "learning_rate": 1.3037315700188545e-05, "loss": 0.8695, "step": 2742 }, { "epoch": 0.42, "grad_norm": 2.691789862579373, "learning_rate": 1.3032591993662782e-05, "loss": 0.9287, "step": 2743 }, { "epoch": 0.42, "grad_norm": 2.7938866440413697, "learning_rate": 1.3027867541810961e-05, "loss": 0.9067, "step": 2744 }, { "epoch": 0.42, "grad_norm": 2.806362708709313, "learning_rate": 1.3023142345794217e-05, "loss": 0.925, "step": 2745 }, { "epoch": 0.42, "grad_norm": 2.8228075732201483, "learning_rate": 1.3018416406773872e-05, "loss": 0.9025, "step": 2746 }, { "epoch": 0.42, "grad_norm": 2.6855012662957063, "learning_rate": 1.3013689725911429e-05, "loss": 0.8737, "step": 2747 }, { "epoch": 0.42, "grad_norm": 2.816680124122433, "learning_rate": 1.3008962304368574e-05, "loss": 0.9318, "step": 2748 }, { "epoch": 0.42, "grad_norm": 2.7972551434857094, "learning_rate": 1.3004234143307173e-05, "loss": 1.0027, "step": 2749 }, { "epoch": 0.42, "grad_norm": 2.8968584915515048, "learning_rate": 1.2999505243889276e-05, "loss": 0.8936, "step": 2750 }, { "epoch": 0.42, "grad_norm": 2.8214777133442754, "learning_rate": 1.2994775607277117e-05, "loss": 0.8905, "step": 2751 }, { "epoch": 0.42, "grad_norm": 2.7101579231098265, "learning_rate": 1.2990045234633103e-05, "loss": 0.9263, "step": 2752 }, { "epoch": 0.42, "grad_norm": 2.677902149989637, "learning_rate": 1.2985314127119827e-05, "loss": 0.8503, "step": 2753 }, { "epoch": 0.42, "grad_norm": 2.7699643153619378, "learning_rate": 1.2980582285900067e-05, "loss": 0.9604, "step": 2754 }, { "epoch": 0.42, "grad_norm": 2.9787744770673408, "learning_rate": 1.2975849712136777e-05, "loss": 0.9063, "step": 2755 }, { "epoch": 0.42, "grad_norm": 2.6750675394118044, "learning_rate": 1.2971116406993087e-05, "loss": 1.0037, "step": 2756 }, { "epoch": 0.42, "grad_norm": 2.76107592986856, "learning_rate": 1.2966382371632312e-05, "loss": 0.8496, "step": 2757 }, { "epoch": 0.42, "grad_norm": 2.810847329041354, "learning_rate": 1.2961647607217947e-05, "loss": 0.9515, "step": 2758 }, { "epoch": 0.42, "grad_norm": 2.5966067731708664, "learning_rate": 1.295691211491366e-05, "loss": 0.8798, "step": 2759 }, { "epoch": 0.42, "grad_norm": 2.62491735299009, "learning_rate": 1.2952175895883305e-05, "loss": 0.8362, "step": 2760 }, { "epoch": 0.42, "grad_norm": 2.739999232523196, "learning_rate": 1.2947438951290914e-05, "loss": 0.8861, "step": 2761 }, { "epoch": 0.42, "grad_norm": 2.794841678738877, "learning_rate": 1.2942701282300694e-05, "loss": 0.8742, "step": 2762 }, { "epoch": 0.42, "grad_norm": 2.5384183680272097, "learning_rate": 1.2937962890077025e-05, "loss": 0.8167, "step": 2763 }, { "epoch": 0.42, "grad_norm": 2.618528137650998, "learning_rate": 1.2933223775784476e-05, "loss": 0.9138, "step": 2764 }, { "epoch": 0.42, "grad_norm": 2.9500922611185936, "learning_rate": 1.292848394058779e-05, "loss": 0.9018, "step": 2765 }, { "epoch": 0.42, "grad_norm": 2.842181916751234, "learning_rate": 1.2923743385651886e-05, "loss": 0.9543, "step": 2766 }, { "epoch": 0.42, "grad_norm": 3.0128918616195084, "learning_rate": 1.2919002112141856e-05, "loss": 1.0471, "step": 2767 }, { "epoch": 0.42, "grad_norm": 2.561823446799556, "learning_rate": 1.2914260121222973e-05, "loss": 0.9366, "step": 2768 }, { "epoch": 0.42, "grad_norm": 2.70083738483455, "learning_rate": 1.2909517414060686e-05, "loss": 0.9575, "step": 2769 }, { "epoch": 0.42, "grad_norm": 2.6430987035979, "learning_rate": 1.2904773991820619e-05, "loss": 0.9195, "step": 2770 }, { "epoch": 0.42, "grad_norm": 2.628729956718192, "learning_rate": 1.290002985566857e-05, "loss": 0.8752, "step": 2771 }, { "epoch": 0.42, "grad_norm": 5.1639188996877206, "learning_rate": 1.2895285006770521e-05, "loss": 1.1261, "step": 2772 }, { "epoch": 0.42, "grad_norm": 2.704694091407873, "learning_rate": 1.2890539446292617e-05, "loss": 0.8353, "step": 2773 }, { "epoch": 0.42, "grad_norm": 2.7133213384391324, "learning_rate": 1.2885793175401184e-05, "loss": 0.8487, "step": 2774 }, { "epoch": 0.42, "grad_norm": 2.766965649726584, "learning_rate": 1.2881046195262722e-05, "loss": 0.8796, "step": 2775 }, { "epoch": 0.42, "grad_norm": 2.666040307272094, "learning_rate": 1.2876298507043909e-05, "loss": 0.9043, "step": 2776 }, { "epoch": 0.43, "grad_norm": 2.6977785742953646, "learning_rate": 1.2871550111911594e-05, "loss": 0.9711, "step": 2777 }, { "epoch": 0.43, "grad_norm": 2.7691161056205362, "learning_rate": 1.2866801011032797e-05, "loss": 0.9293, "step": 2778 }, { "epoch": 0.43, "grad_norm": 2.7409823091121863, "learning_rate": 1.2862051205574711e-05, "loss": 0.9393, "step": 2779 }, { "epoch": 0.43, "grad_norm": 2.729972015716603, "learning_rate": 1.2857300696704709e-05, "loss": 0.7922, "step": 2780 }, { "epoch": 0.43, "grad_norm": 2.6327159311383377, "learning_rate": 1.2852549485590333e-05, "loss": 0.8727, "step": 2781 }, { "epoch": 0.43, "grad_norm": 2.780302599242152, "learning_rate": 1.2847797573399295e-05, "loss": 0.995, "step": 2782 }, { "epoch": 0.43, "grad_norm": 2.8037074207099093, "learning_rate": 1.2843044961299485e-05, "loss": 0.9171, "step": 2783 }, { "epoch": 0.43, "grad_norm": 2.6445660114893594, "learning_rate": 1.2838291650458958e-05, "loss": 0.925, "step": 2784 }, { "epoch": 0.43, "grad_norm": 2.837391869572842, "learning_rate": 1.2833537642045946e-05, "loss": 0.9195, "step": 2785 }, { "epoch": 0.43, "grad_norm": 2.824701378485826, "learning_rate": 1.282878293722885e-05, "loss": 0.8747, "step": 2786 }, { "epoch": 0.43, "grad_norm": 2.9369630126925377, "learning_rate": 1.2824027537176245e-05, "loss": 0.8264, "step": 2787 }, { "epoch": 0.43, "grad_norm": 2.979787998965054, "learning_rate": 1.2819271443056875e-05, "loss": 0.8666, "step": 2788 }, { "epoch": 0.43, "grad_norm": 2.7097297050328284, "learning_rate": 1.2814514656039654e-05, "loss": 0.8764, "step": 2789 }, { "epoch": 0.43, "grad_norm": 2.7239968607272536, "learning_rate": 1.2809757177293665e-05, "loss": 0.8569, "step": 2790 }, { "epoch": 0.43, "grad_norm": 5.739216278795826, "learning_rate": 1.2804999007988164e-05, "loss": 1.076, "step": 2791 }, { "epoch": 0.43, "grad_norm": 2.78097932188797, "learning_rate": 1.280024014929258e-05, "loss": 0.9201, "step": 2792 }, { "epoch": 0.43, "grad_norm": 2.7816801537901346, "learning_rate": 1.2795480602376498e-05, "loss": 0.8948, "step": 2793 }, { "epoch": 0.43, "grad_norm": 2.8022224490668406, "learning_rate": 1.279072036840969e-05, "loss": 0.9382, "step": 2794 }, { "epoch": 0.43, "grad_norm": 3.0675784329509876, "learning_rate": 1.2785959448562085e-05, "loss": 0.969, "step": 2795 }, { "epoch": 0.43, "grad_norm": 2.6091188213405028, "learning_rate": 1.2781197844003779e-05, "loss": 0.7114, "step": 2796 }, { "epoch": 0.43, "grad_norm": 2.9346912134531844, "learning_rate": 1.2776435555905044e-05, "loss": 0.9375, "step": 2797 }, { "epoch": 0.43, "grad_norm": 2.720573376646091, "learning_rate": 1.2771672585436319e-05, "loss": 0.9549, "step": 2798 }, { "epoch": 0.43, "grad_norm": 2.816531795887586, "learning_rate": 1.2766908933768208e-05, "loss": 0.8821, "step": 2799 }, { "epoch": 0.43, "grad_norm": 2.8434065002551825, "learning_rate": 1.276214460207148e-05, "loss": 0.9636, "step": 2800 }, { "epoch": 0.43, "grad_norm": 2.513410606594475, "learning_rate": 1.2757379591517078e-05, "loss": 0.8945, "step": 2801 }, { "epoch": 0.43, "grad_norm": 2.572942288837356, "learning_rate": 1.2752613903276105e-05, "loss": 0.8388, "step": 2802 }, { "epoch": 0.43, "grad_norm": 2.3877301322816327, "learning_rate": 1.2747847538519835e-05, "loss": 0.8968, "step": 2803 }, { "epoch": 0.43, "grad_norm": 2.8973856675083773, "learning_rate": 1.2743080498419706e-05, "loss": 0.8581, "step": 2804 }, { "epoch": 0.43, "grad_norm": 2.6479081480567146, "learning_rate": 1.2738312784147321e-05, "loss": 0.9422, "step": 2805 }, { "epoch": 0.43, "grad_norm": 2.658536179740867, "learning_rate": 1.2733544396874458e-05, "loss": 0.9756, "step": 2806 }, { "epoch": 0.43, "grad_norm": 2.7879058284524008, "learning_rate": 1.2728775337773045e-05, "loss": 0.7959, "step": 2807 }, { "epoch": 0.43, "grad_norm": 2.56131826463935, "learning_rate": 1.2724005608015182e-05, "loss": 0.9383, "step": 2808 }, { "epoch": 0.43, "grad_norm": 2.7028874499026885, "learning_rate": 1.271923520877314e-05, "loss": 0.9949, "step": 2809 }, { "epoch": 0.43, "grad_norm": 2.6867758439522373, "learning_rate": 1.2714464141219349e-05, "loss": 0.9218, "step": 2810 }, { "epoch": 0.43, "grad_norm": 2.52131520434168, "learning_rate": 1.2709692406526402e-05, "loss": 0.8991, "step": 2811 }, { "epoch": 0.43, "grad_norm": 2.6569271969175845, "learning_rate": 1.2704920005867056e-05, "loss": 0.9066, "step": 2812 }, { "epoch": 0.43, "grad_norm": 2.8660174545559434, "learning_rate": 1.2700146940414235e-05, "loss": 0.992, "step": 2813 }, { "epoch": 0.43, "grad_norm": 2.8443981988384084, "learning_rate": 1.2695373211341027e-05, "loss": 0.9334, "step": 2814 }, { "epoch": 0.43, "grad_norm": 2.8875230537426404, "learning_rate": 1.2690598819820673e-05, "loss": 0.9484, "step": 2815 }, { "epoch": 0.43, "grad_norm": 2.4775901913816125, "learning_rate": 1.2685823767026595e-05, "loss": 0.891, "step": 2816 }, { "epoch": 0.43, "grad_norm": 2.7206299402662686, "learning_rate": 1.2681048054132362e-05, "loss": 0.9514, "step": 2817 }, { "epoch": 0.43, "grad_norm": 3.167692193895392, "learning_rate": 1.2676271682311707e-05, "loss": 0.8848, "step": 2818 }, { "epoch": 0.43, "grad_norm": 5.190369872857878, "learning_rate": 1.2671494652738532e-05, "loss": 0.9674, "step": 2819 }, { "epoch": 0.43, "grad_norm": 2.7075328177894096, "learning_rate": 1.2666716966586897e-05, "loss": 0.9712, "step": 2820 }, { "epoch": 0.43, "grad_norm": 2.624690499566204, "learning_rate": 1.2661938625031023e-05, "loss": 0.8567, "step": 2821 }, { "epoch": 0.43, "grad_norm": 2.8968353468717307, "learning_rate": 1.2657159629245289e-05, "loss": 0.8855, "step": 2822 }, { "epoch": 0.43, "grad_norm": 2.654379274046832, "learning_rate": 1.2652379980404243e-05, "loss": 0.864, "step": 2823 }, { "epoch": 0.43, "grad_norm": 2.6926644329777125, "learning_rate": 1.2647599679682587e-05, "loss": 0.8662, "step": 2824 }, { "epoch": 0.43, "grad_norm": 3.082786408851719, "learning_rate": 1.2642818728255187e-05, "loss": 0.8975, "step": 2825 }, { "epoch": 0.43, "grad_norm": 2.738555091909095, "learning_rate": 1.2638037127297057e-05, "loss": 0.846, "step": 2826 }, { "epoch": 0.43, "grad_norm": 2.674779193194505, "learning_rate": 1.2633254877983391e-05, "loss": 0.8139, "step": 2827 }, { "epoch": 0.43, "grad_norm": 2.6945356029820298, "learning_rate": 1.2628471981489531e-05, "loss": 1.0136, "step": 2828 }, { "epoch": 0.43, "grad_norm": 2.752427401318561, "learning_rate": 1.2623688438990977e-05, "loss": 0.8937, "step": 2829 }, { "epoch": 0.43, "grad_norm": 2.520523775277756, "learning_rate": 1.2618904251663383e-05, "loss": 0.8781, "step": 2830 }, { "epoch": 0.43, "grad_norm": 2.7637733636446513, "learning_rate": 1.2614119420682578e-05, "loss": 0.8954, "step": 2831 }, { "epoch": 0.43, "grad_norm": 2.618376381488524, "learning_rate": 1.2609333947224536e-05, "loss": 0.8622, "step": 2832 }, { "epoch": 0.43, "grad_norm": 2.6964185788176973, "learning_rate": 1.2604547832465388e-05, "loss": 0.9027, "step": 2833 }, { "epoch": 0.43, "grad_norm": 2.4961769332020256, "learning_rate": 1.2599761077581432e-05, "loss": 0.896, "step": 2834 }, { "epoch": 0.43, "grad_norm": 3.1354249286666676, "learning_rate": 1.2594973683749117e-05, "loss": 0.917, "step": 2835 }, { "epoch": 0.43, "grad_norm": 2.499449814627182, "learning_rate": 1.2590185652145048e-05, "loss": 0.876, "step": 2836 }, { "epoch": 0.43, "grad_norm": 2.6758994644461054, "learning_rate": 1.258539698394599e-05, "loss": 0.9023, "step": 2837 }, { "epoch": 0.43, "grad_norm": 2.721132926239213, "learning_rate": 1.2580607680328862e-05, "loss": 0.8959, "step": 2838 }, { "epoch": 0.43, "grad_norm": 3.012454865319753, "learning_rate": 1.2575817742470744e-05, "loss": 0.856, "step": 2839 }, { "epoch": 0.43, "grad_norm": 2.704714719021651, "learning_rate": 1.2571027171548869e-05, "loss": 0.8977, "step": 2840 }, { "epoch": 0.43, "grad_norm": 2.84919928232391, "learning_rate": 1.2566235968740617e-05, "loss": 0.9952, "step": 2841 }, { "epoch": 0.44, "grad_norm": 2.6574199172892525, "learning_rate": 1.256144413522354e-05, "loss": 0.9264, "step": 2842 }, { "epoch": 0.44, "grad_norm": 2.614026383064716, "learning_rate": 1.2556651672175334e-05, "loss": 0.814, "step": 2843 }, { "epoch": 0.44, "grad_norm": 2.701109692270557, "learning_rate": 1.2551858580773849e-05, "loss": 0.9096, "step": 2844 }, { "epoch": 0.44, "grad_norm": 3.238141437418869, "learning_rate": 1.2547064862197094e-05, "loss": 0.9762, "step": 2845 }, { "epoch": 0.44, "grad_norm": 2.6739228366592105, "learning_rate": 1.254227051762323e-05, "loss": 0.8861, "step": 2846 }, { "epoch": 0.44, "grad_norm": 2.939568840529801, "learning_rate": 1.2537475548230576e-05, "loss": 0.897, "step": 2847 }, { "epoch": 0.44, "grad_norm": 6.158877445042974, "learning_rate": 1.2532679955197598e-05, "loss": 1.1339, "step": 2848 }, { "epoch": 0.44, "grad_norm": 2.7311145656490585, "learning_rate": 1.2527883739702915e-05, "loss": 0.7805, "step": 2849 }, { "epoch": 0.44, "grad_norm": 2.887246109250409, "learning_rate": 1.252308690292531e-05, "loss": 0.9028, "step": 2850 }, { "epoch": 0.44, "grad_norm": 3.1492961703914863, "learning_rate": 1.2518289446043708e-05, "loss": 0.8904, "step": 2851 }, { "epoch": 0.44, "grad_norm": 2.86800072338607, "learning_rate": 1.2513491370237185e-05, "loss": 0.9, "step": 2852 }, { "epoch": 0.44, "grad_norm": 3.0258328334393756, "learning_rate": 1.2508692676684976e-05, "loss": 0.9429, "step": 2853 }, { "epoch": 0.44, "grad_norm": 2.7665456325819693, "learning_rate": 1.250389336656647e-05, "loss": 0.9439, "step": 2854 }, { "epoch": 0.44, "grad_norm": 2.5646168892460377, "learning_rate": 1.2499093441061197e-05, "loss": 0.9178, "step": 2855 }, { "epoch": 0.44, "grad_norm": 2.6287532728333125, "learning_rate": 1.2494292901348843e-05, "loss": 0.8059, "step": 2856 }, { "epoch": 0.44, "grad_norm": 2.958320890926117, "learning_rate": 1.2489491748609252e-05, "loss": 0.8785, "step": 2857 }, { "epoch": 0.44, "grad_norm": 3.2348670122620264, "learning_rate": 1.2484689984022411e-05, "loss": 0.8628, "step": 2858 }, { "epoch": 0.44, "grad_norm": 2.980007281557108, "learning_rate": 1.2479887608768456e-05, "loss": 0.9237, "step": 2859 }, { "epoch": 0.44, "grad_norm": 3.1141166290087186, "learning_rate": 1.2475084624027676e-05, "loss": 0.9854, "step": 2860 }, { "epoch": 0.44, "grad_norm": 2.6165244904164173, "learning_rate": 1.2470281030980514e-05, "loss": 0.8273, "step": 2861 }, { "epoch": 0.44, "grad_norm": 2.6790421711796113, "learning_rate": 1.2465476830807554e-05, "loss": 0.8995, "step": 2862 }, { "epoch": 0.44, "grad_norm": 2.6940258268094537, "learning_rate": 1.246067202468954e-05, "loss": 0.7782, "step": 2863 }, { "epoch": 0.44, "grad_norm": 2.7195876883706096, "learning_rate": 1.245586661380735e-05, "loss": 0.9042, "step": 2864 }, { "epoch": 0.44, "grad_norm": 2.6665709592021134, "learning_rate": 1.2451060599342027e-05, "loss": 0.8503, "step": 2865 }, { "epoch": 0.44, "grad_norm": 2.780535821992812, "learning_rate": 1.244625398247475e-05, "loss": 0.8841, "step": 2866 }, { "epoch": 0.44, "grad_norm": 2.3861406054156697, "learning_rate": 1.2441446764386852e-05, "loss": 0.8027, "step": 2867 }, { "epoch": 0.44, "grad_norm": 3.1004373602009925, "learning_rate": 1.2436638946259812e-05, "loss": 0.8828, "step": 2868 }, { "epoch": 0.44, "grad_norm": 2.6726937897135388, "learning_rate": 1.2431830529275258e-05, "loss": 0.8096, "step": 2869 }, { "epoch": 0.44, "grad_norm": 2.699417394780327, "learning_rate": 1.242702151461496e-05, "loss": 0.8516, "step": 2870 }, { "epoch": 0.44, "grad_norm": 2.8101062223473368, "learning_rate": 1.2422211903460845e-05, "loss": 0.9078, "step": 2871 }, { "epoch": 0.44, "grad_norm": 6.152195380073992, "learning_rate": 1.2417401696994976e-05, "loss": 1.0162, "step": 2872 }, { "epoch": 0.44, "grad_norm": 2.8336300027831007, "learning_rate": 1.241259089639957e-05, "loss": 0.799, "step": 2873 }, { "epoch": 0.44, "grad_norm": 3.050238017683165, "learning_rate": 1.2407779502856987e-05, "loss": 0.8961, "step": 2874 }, { "epoch": 0.44, "grad_norm": 2.808485721043457, "learning_rate": 1.2402967517549727e-05, "loss": 0.8932, "step": 2875 }, { "epoch": 0.44, "grad_norm": 2.7191297854715617, "learning_rate": 1.2398154941660444e-05, "loss": 0.9282, "step": 2876 }, { "epoch": 0.44, "grad_norm": 2.724204028606329, "learning_rate": 1.2393341776371938e-05, "loss": 0.9891, "step": 2877 }, { "epoch": 0.44, "grad_norm": 2.7601559575612145, "learning_rate": 1.2388528022867149e-05, "loss": 0.8699, "step": 2878 }, { "epoch": 0.44, "grad_norm": 3.000915940449102, "learning_rate": 1.238371368232916e-05, "loss": 0.8998, "step": 2879 }, { "epoch": 0.44, "grad_norm": 4.492961193488793, "learning_rate": 1.23788987559412e-05, "loss": 0.8853, "step": 2880 }, { "epoch": 0.44, "grad_norm": 2.8430538524255873, "learning_rate": 1.2374083244886643e-05, "loss": 0.9793, "step": 2881 }, { "epoch": 0.44, "grad_norm": 2.6505794293919682, "learning_rate": 1.2369267150349009e-05, "loss": 0.9107, "step": 2882 }, { "epoch": 0.44, "grad_norm": 2.7726946812456537, "learning_rate": 1.2364450473511958e-05, "loss": 0.8269, "step": 2883 }, { "epoch": 0.44, "grad_norm": 2.5356256975086793, "learning_rate": 1.2359633215559297e-05, "loss": 0.856, "step": 2884 }, { "epoch": 0.44, "grad_norm": 2.705532891404338, "learning_rate": 1.235481537767497e-05, "loss": 0.8162, "step": 2885 }, { "epoch": 0.44, "grad_norm": 2.5881663782552518, "learning_rate": 1.2349996961043063e-05, "loss": 0.8658, "step": 2886 }, { "epoch": 0.44, "grad_norm": 2.7240151945541533, "learning_rate": 1.234517796684781e-05, "loss": 0.8341, "step": 2887 }, { "epoch": 0.44, "grad_norm": 2.8736602833296305, "learning_rate": 1.234035839627359e-05, "loss": 0.8701, "step": 2888 }, { "epoch": 0.44, "grad_norm": 2.496571636405041, "learning_rate": 1.233553825050491e-05, "loss": 0.8708, "step": 2889 }, { "epoch": 0.44, "grad_norm": 2.7426844394459926, "learning_rate": 1.2330717530726435e-05, "loss": 0.8606, "step": 2890 }, { "epoch": 0.44, "grad_norm": 20.411245162121194, "learning_rate": 1.2325896238122958e-05, "loss": 0.9352, "step": 2891 }, { "epoch": 0.44, "grad_norm": 3.088228446918177, "learning_rate": 1.2321074373879416e-05, "loss": 0.8612, "step": 2892 }, { "epoch": 0.44, "grad_norm": 8.92235944442913, "learning_rate": 1.2316251939180888e-05, "loss": 0.8304, "step": 2893 }, { "epoch": 0.44, "grad_norm": 2.9263552390861607, "learning_rate": 1.2311428935212598e-05, "loss": 0.7888, "step": 2894 }, { "epoch": 0.44, "grad_norm": 2.9573729539265527, "learning_rate": 1.23066053631599e-05, "loss": 0.881, "step": 2895 }, { "epoch": 0.44, "grad_norm": 2.592117602341213, "learning_rate": 1.2301781224208297e-05, "loss": 0.8186, "step": 2896 }, { "epoch": 0.44, "grad_norm": 2.804394996969601, "learning_rate": 1.2296956519543424e-05, "loss": 0.9044, "step": 2897 }, { "epoch": 0.44, "grad_norm": 2.6173110797940975, "learning_rate": 1.2292131250351059e-05, "loss": 0.8435, "step": 2898 }, { "epoch": 0.44, "grad_norm": 2.744848357135238, "learning_rate": 1.228730541781712e-05, "loss": 0.8331, "step": 2899 }, { "epoch": 0.44, "grad_norm": 2.8600823953486634, "learning_rate": 1.2282479023127656e-05, "loss": 0.9479, "step": 2900 }, { "epoch": 0.44, "grad_norm": 3.128079937009162, "learning_rate": 1.2277652067468864e-05, "loss": 0.881, "step": 2901 }, { "epoch": 0.44, "grad_norm": 2.6693888567881565, "learning_rate": 1.2272824552027072e-05, "loss": 0.8039, "step": 2902 }, { "epoch": 0.44, "grad_norm": 2.9620619172529166, "learning_rate": 1.226799647798875e-05, "loss": 0.8716, "step": 2903 }, { "epoch": 0.44, "grad_norm": 2.7313416619549824, "learning_rate": 1.2263167846540502e-05, "loss": 0.9502, "step": 2904 }, { "epoch": 0.44, "grad_norm": 5.497128092116304, "learning_rate": 1.2258338658869069e-05, "loss": 1.0453, "step": 2905 }, { "epoch": 0.44, "grad_norm": 2.5403130711331765, "learning_rate": 1.2253508916161331e-05, "loss": 0.792, "step": 2906 }, { "epoch": 0.44, "grad_norm": 3.122401354526183, "learning_rate": 1.2248678619604308e-05, "loss": 0.8809, "step": 2907 }, { "epoch": 0.45, "grad_norm": 6.86419136824606, "learning_rate": 1.2243847770385142e-05, "loss": 1.0589, "step": 2908 }, { "epoch": 0.45, "grad_norm": 2.8875754346845306, "learning_rate": 1.2239016369691126e-05, "loss": 0.835, "step": 2909 }, { "epoch": 0.45, "grad_norm": 3.028081918992309, "learning_rate": 1.2234184418709685e-05, "loss": 0.8556, "step": 2910 }, { "epoch": 0.45, "grad_norm": 2.6065156965874188, "learning_rate": 1.222935191862837e-05, "loss": 0.9404, "step": 2911 }, { "epoch": 0.45, "grad_norm": 3.1134873620451864, "learning_rate": 1.2224518870634879e-05, "loss": 0.8696, "step": 2912 }, { "epoch": 0.45, "grad_norm": 2.5175705354858375, "learning_rate": 1.2219685275917039e-05, "loss": 0.8818, "step": 2913 }, { "epoch": 0.45, "grad_norm": 2.810887240705469, "learning_rate": 1.2214851135662813e-05, "loss": 0.8863, "step": 2914 }, { "epoch": 0.45, "grad_norm": 2.7008514488137108, "learning_rate": 1.2210016451060291e-05, "loss": 0.8713, "step": 2915 }, { "epoch": 0.45, "grad_norm": 2.9587433703312325, "learning_rate": 1.2205181223297712e-05, "loss": 0.9416, "step": 2916 }, { "epoch": 0.45, "grad_norm": 5.448777441858399, "learning_rate": 1.2200345453563433e-05, "loss": 1.0207, "step": 2917 }, { "epoch": 0.45, "grad_norm": 2.714595830544037, "learning_rate": 1.2195509143045953e-05, "loss": 0.8795, "step": 2918 }, { "epoch": 0.45, "grad_norm": 2.9810252209169565, "learning_rate": 1.2190672292933902e-05, "loss": 0.9086, "step": 2919 }, { "epoch": 0.45, "grad_norm": 2.750782059673263, "learning_rate": 1.218583490441604e-05, "loss": 0.9838, "step": 2920 }, { "epoch": 0.45, "grad_norm": 2.588070443866364, "learning_rate": 1.2180996978681262e-05, "loss": 0.8068, "step": 2921 }, { "epoch": 0.45, "grad_norm": 3.5146551808157622, "learning_rate": 1.2176158516918597e-05, "loss": 0.7791, "step": 2922 }, { "epoch": 0.45, "grad_norm": 2.9319161515472665, "learning_rate": 1.21713195203172e-05, "loss": 0.8309, "step": 2923 }, { "epoch": 0.45, "grad_norm": 2.6399633926053623, "learning_rate": 1.2166479990066362e-05, "loss": 0.7922, "step": 2924 }, { "epoch": 0.45, "grad_norm": 2.5698088581181113, "learning_rate": 1.2161639927355503e-05, "loss": 0.838, "step": 2925 }, { "epoch": 0.45, "grad_norm": 2.56267127959082, "learning_rate": 1.2156799333374176e-05, "loss": 0.9022, "step": 2926 }, { "epoch": 0.45, "grad_norm": 2.6103619596786913, "learning_rate": 1.2151958209312063e-05, "loss": 0.9504, "step": 2927 }, { "epoch": 0.45, "grad_norm": 2.754608561725593, "learning_rate": 1.2147116556358975e-05, "loss": 0.8826, "step": 2928 }, { "epoch": 0.45, "grad_norm": 3.05394639932344, "learning_rate": 1.2142274375704855e-05, "loss": 0.9252, "step": 2929 }, { "epoch": 0.45, "grad_norm": 2.758527341831582, "learning_rate": 1.2137431668539778e-05, "loss": 0.8559, "step": 2930 }, { "epoch": 0.45, "grad_norm": 2.650067159518404, "learning_rate": 1.2132588436053942e-05, "loss": 0.8278, "step": 2931 }, { "epoch": 0.45, "grad_norm": 3.2212624060518076, "learning_rate": 1.2127744679437681e-05, "loss": 0.795, "step": 2932 }, { "epoch": 0.45, "grad_norm": 2.568855356171706, "learning_rate": 1.212290039988145e-05, "loss": 0.8856, "step": 2933 }, { "epoch": 0.45, "grad_norm": 4.895450975084306, "learning_rate": 1.211805559857584e-05, "loss": 1.0471, "step": 2934 }, { "epoch": 0.45, "grad_norm": 2.602246628619692, "learning_rate": 1.211321027671157e-05, "loss": 0.8483, "step": 2935 }, { "epoch": 0.45, "grad_norm": 2.6483466494779972, "learning_rate": 1.2108364435479478e-05, "loss": 0.9137, "step": 2936 }, { "epoch": 0.45, "grad_norm": 2.708496704441042, "learning_rate": 1.210351807607054e-05, "loss": 0.9075, "step": 2937 }, { "epoch": 0.45, "grad_norm": 3.2501209914236693, "learning_rate": 1.2098671199675851e-05, "loss": 0.9656, "step": 2938 }, { "epoch": 0.45, "grad_norm": 2.7307672357696626, "learning_rate": 1.2093823807486645e-05, "loss": 0.8287, "step": 2939 }, { "epoch": 0.45, "grad_norm": 2.715745559891006, "learning_rate": 1.2088975900694269e-05, "loss": 0.7912, "step": 2940 }, { "epoch": 0.45, "grad_norm": 2.7187018995015113, "learning_rate": 1.2084127480490206e-05, "loss": 0.8247, "step": 2941 }, { "epoch": 0.45, "grad_norm": 2.5691527149286326, "learning_rate": 1.2079278548066058e-05, "loss": 0.8684, "step": 2942 }, { "epoch": 0.45, "grad_norm": 2.7598411907507274, "learning_rate": 1.2074429104613558e-05, "loss": 0.8464, "step": 2943 }, { "epoch": 0.45, "grad_norm": 5.124777456564711, "learning_rate": 1.2069579151324563e-05, "loss": 1.0324, "step": 2944 }, { "epoch": 0.45, "grad_norm": 2.553503614208449, "learning_rate": 1.2064728689391059e-05, "loss": 0.8478, "step": 2945 }, { "epoch": 0.45, "grad_norm": 5.142861541542434, "learning_rate": 1.2059877720005149e-05, "loss": 1.0479, "step": 2946 }, { "epoch": 0.45, "grad_norm": 2.7464581018539866, "learning_rate": 1.205502624435907e-05, "loss": 0.8466, "step": 2947 }, { "epoch": 0.45, "grad_norm": 2.437733125582104, "learning_rate": 1.2050174263645169e-05, "loss": 0.8697, "step": 2948 }, { "epoch": 0.45, "grad_norm": 4.590483344547154, "learning_rate": 1.2045321779055936e-05, "loss": 1.0495, "step": 2949 }, { "epoch": 0.45, "grad_norm": 4.379367692712861, "learning_rate": 1.2040468791783973e-05, "loss": 1.0306, "step": 2950 }, { "epoch": 0.45, "grad_norm": 3.3149611334682025, "learning_rate": 1.203561530302201e-05, "loss": 1.001, "step": 2951 }, { "epoch": 0.45, "grad_norm": 7.087327985322694, "learning_rate": 1.2030761313962898e-05, "loss": 1.0608, "step": 2952 }, { "epoch": 0.45, "grad_norm": 4.516713422731289, "learning_rate": 1.2025906825799604e-05, "loss": 0.9792, "step": 2953 }, { "epoch": 0.45, "grad_norm": 2.870450403349401, "learning_rate": 1.2021051839725235e-05, "loss": 0.8342, "step": 2954 }, { "epoch": 0.45, "grad_norm": 2.623934475340532, "learning_rate": 1.2016196356933005e-05, "loss": 0.8126, "step": 2955 }, { "epoch": 0.45, "grad_norm": 2.757354876528018, "learning_rate": 1.2011340378616256e-05, "loss": 0.9232, "step": 2956 }, { "epoch": 0.45, "grad_norm": 2.6140798539319494, "learning_rate": 1.2006483905968456e-05, "loss": 0.8552, "step": 2957 }, { "epoch": 0.45, "grad_norm": 2.6166843878359862, "learning_rate": 1.2001626940183185e-05, "loss": 0.926, "step": 2958 }, { "epoch": 0.45, "grad_norm": 2.6471979880923047, "learning_rate": 1.1996769482454144e-05, "loss": 0.8624, "step": 2959 }, { "epoch": 0.45, "grad_norm": 2.6230161949992663, "learning_rate": 1.1991911533975172e-05, "loss": 0.7624, "step": 2960 }, { "epoch": 0.45, "grad_norm": 2.7014344511734807, "learning_rate": 1.1987053095940204e-05, "loss": 0.86, "step": 2961 }, { "epoch": 0.45, "grad_norm": 2.649651028227363, "learning_rate": 1.198219416954332e-05, "loss": 0.9156, "step": 2962 }, { "epoch": 0.45, "grad_norm": 2.5928321617079844, "learning_rate": 1.19773347559787e-05, "loss": 0.8112, "step": 2963 }, { "epoch": 0.45, "grad_norm": 2.7488508110619754, "learning_rate": 1.1972474856440654e-05, "loss": 0.9098, "step": 2964 }, { "epoch": 0.45, "grad_norm": 2.7198204374543224, "learning_rate": 1.1967614472123607e-05, "loss": 0.8645, "step": 2965 }, { "epoch": 0.45, "grad_norm": 3.05265967983416, "learning_rate": 1.1962753604222108e-05, "loss": 0.9255, "step": 2966 }, { "epoch": 0.45, "grad_norm": 2.6759537172100645, "learning_rate": 1.1957892253930819e-05, "loss": 0.8436, "step": 2967 }, { "epoch": 0.45, "grad_norm": 3.2144844492572755, "learning_rate": 1.1953030422444526e-05, "loss": 0.914, "step": 2968 }, { "epoch": 0.45, "grad_norm": 3.0346285503049057, "learning_rate": 1.1948168110958132e-05, "loss": 0.9222, "step": 2969 }, { "epoch": 0.45, "grad_norm": 2.919974504371189, "learning_rate": 1.194330532066665e-05, "loss": 0.9076, "step": 2970 }, { "epoch": 0.45, "grad_norm": 2.916037722150218, "learning_rate": 1.1938442052765225e-05, "loss": 0.9252, "step": 2971 }, { "epoch": 0.45, "grad_norm": 3.0399673977308037, "learning_rate": 1.1933578308449108e-05, "loss": 0.7823, "step": 2972 }, { "epoch": 0.46, "grad_norm": 2.554756201579733, "learning_rate": 1.1928714088913673e-05, "loss": 0.828, "step": 2973 }, { "epoch": 0.46, "grad_norm": 2.7425865424645615, "learning_rate": 1.1923849395354407e-05, "loss": 0.8717, "step": 2974 }, { "epoch": 0.46, "grad_norm": 2.375571545693288, "learning_rate": 1.1918984228966917e-05, "loss": 0.8431, "step": 2975 }, { "epoch": 0.46, "grad_norm": 2.9360801702086463, "learning_rate": 1.1914118590946924e-05, "loss": 0.8792, "step": 2976 }, { "epoch": 0.46, "grad_norm": 2.9626264065606702, "learning_rate": 1.1909252482490263e-05, "loss": 0.8869, "step": 2977 }, { "epoch": 0.46, "grad_norm": 2.890042514095932, "learning_rate": 1.190438590479289e-05, "loss": 0.9294, "step": 2978 }, { "epoch": 0.46, "grad_norm": 2.981870606613395, "learning_rate": 1.1899518859050869e-05, "loss": 0.9017, "step": 2979 }, { "epoch": 0.46, "grad_norm": 2.715493009051848, "learning_rate": 1.1894651346460391e-05, "loss": 0.8221, "step": 2980 }, { "epoch": 0.46, "grad_norm": 10.855410286020899, "learning_rate": 1.188978336821775e-05, "loss": 1.0945, "step": 2981 }, { "epoch": 0.46, "grad_norm": 2.7232678577599536, "learning_rate": 1.1884914925519356e-05, "loss": 0.8186, "step": 2982 }, { "epoch": 0.46, "grad_norm": 2.482457923899972, "learning_rate": 1.1880046019561735e-05, "loss": 0.8432, "step": 2983 }, { "epoch": 0.46, "grad_norm": 2.639829647704218, "learning_rate": 1.1875176651541533e-05, "loss": 0.8063, "step": 2984 }, { "epoch": 0.46, "grad_norm": 3.107733287783824, "learning_rate": 1.1870306822655502e-05, "loss": 0.9191, "step": 2985 }, { "epoch": 0.46, "grad_norm": 2.7323130504400144, "learning_rate": 1.1865436534100508e-05, "loss": 0.9172, "step": 2986 }, { "epoch": 0.46, "grad_norm": 2.7044458896629044, "learning_rate": 1.186056578707353e-05, "loss": 0.9815, "step": 2987 }, { "epoch": 0.46, "grad_norm": 2.8592139872621245, "learning_rate": 1.1855694582771666e-05, "loss": 0.9378, "step": 2988 }, { "epoch": 0.46, "grad_norm": 2.565066517688624, "learning_rate": 1.1850822922392119e-05, "loss": 0.8113, "step": 2989 }, { "epoch": 0.46, "grad_norm": 2.6779479847097685, "learning_rate": 1.1845950807132203e-05, "loss": 0.8412, "step": 2990 }, { "epoch": 0.46, "grad_norm": 2.5545351126336584, "learning_rate": 1.1841078238189352e-05, "loss": 0.9046, "step": 2991 }, { "epoch": 0.46, "grad_norm": 2.6450426222819354, "learning_rate": 1.1836205216761105e-05, "loss": 0.8617, "step": 2992 }, { "epoch": 0.46, "grad_norm": 2.6275419597565746, "learning_rate": 1.1831331744045114e-05, "loss": 0.8616, "step": 2993 }, { "epoch": 0.46, "grad_norm": 2.7510452230302787, "learning_rate": 1.182645782123914e-05, "loss": 0.8931, "step": 2994 }, { "epoch": 0.46, "grad_norm": 2.6643225382375206, "learning_rate": 1.182158344954106e-05, "loss": 0.8337, "step": 2995 }, { "epoch": 0.46, "grad_norm": 2.618555830855707, "learning_rate": 1.1816708630148857e-05, "loss": 0.9202, "step": 2996 }, { "epoch": 0.46, "grad_norm": 2.8455305353008713, "learning_rate": 1.1811833364260625e-05, "loss": 0.9023, "step": 2997 }, { "epoch": 0.46, "grad_norm": 2.7928103126274375, "learning_rate": 1.1806957653074564e-05, "loss": 0.8542, "step": 2998 }, { "epoch": 0.46, "grad_norm": 2.7731698718633604, "learning_rate": 1.1802081497788993e-05, "loss": 0.7557, "step": 2999 }, { "epoch": 0.46, "grad_norm": 2.814943969739714, "learning_rate": 1.1797204899602328e-05, "loss": 0.8808, "step": 3000 }, { "epoch": 0.46, "grad_norm": 2.8355012116827134, "learning_rate": 1.1792327859713104e-05, "loss": 0.8385, "step": 3001 }, { "epoch": 0.46, "grad_norm": 3.1537111761470373, "learning_rate": 1.1787450379319963e-05, "loss": 0.8303, "step": 3002 }, { "epoch": 0.46, "grad_norm": 2.744178078536589, "learning_rate": 1.1782572459621646e-05, "loss": 0.9255, "step": 3003 }, { "epoch": 0.46, "grad_norm": 2.5395370642311623, "learning_rate": 1.1777694101817014e-05, "loss": 0.8489, "step": 3004 }, { "epoch": 0.46, "grad_norm": 2.9295376938033324, "learning_rate": 1.1772815307105027e-05, "loss": 0.9934, "step": 3005 }, { "epoch": 0.46, "grad_norm": 2.8432809249138042, "learning_rate": 1.176793607668476e-05, "loss": 0.9256, "step": 3006 }, { "epoch": 0.46, "grad_norm": 2.6080502082997423, "learning_rate": 1.176305641175539e-05, "loss": 0.8401, "step": 3007 }, { "epoch": 0.46, "grad_norm": 2.6194753528697103, "learning_rate": 1.17581763135162e-05, "loss": 0.8568, "step": 3008 }, { "epoch": 0.46, "grad_norm": 2.518047586237937, "learning_rate": 1.1753295783166581e-05, "loss": 0.8328, "step": 3009 }, { "epoch": 0.46, "grad_norm": 2.4931889524384694, "learning_rate": 1.1748414821906034e-05, "loss": 0.8611, "step": 3010 }, { "epoch": 0.46, "grad_norm": 2.540801850644384, "learning_rate": 1.1743533430934155e-05, "loss": 0.9411, "step": 3011 }, { "epoch": 0.46, "grad_norm": 2.7099336287100435, "learning_rate": 1.173865161145066e-05, "loss": 0.7831, "step": 3012 }, { "epoch": 0.46, "grad_norm": 3.11702898724108, "learning_rate": 1.1733769364655363e-05, "loss": 0.9089, "step": 3013 }, { "epoch": 0.46, "grad_norm": 5.378925417302546, "learning_rate": 1.1728886691748183e-05, "loss": 1.0161, "step": 3014 }, { "epoch": 0.46, "grad_norm": 2.7509304800419336, "learning_rate": 1.1724003593929138e-05, "loss": 0.9121, "step": 3015 }, { "epoch": 0.46, "grad_norm": 2.68915934169062, "learning_rate": 1.1719120072398361e-05, "loss": 0.8014, "step": 3016 }, { "epoch": 0.46, "grad_norm": 2.7399760762830856, "learning_rate": 1.1714236128356092e-05, "loss": 0.8724, "step": 3017 }, { "epoch": 0.46, "grad_norm": 3.030162280916283, "learning_rate": 1.1709351763002652e-05, "loss": 0.8158, "step": 3018 }, { "epoch": 0.46, "grad_norm": 2.585101360406029, "learning_rate": 1.1704466977538496e-05, "loss": 0.9078, "step": 3019 }, { "epoch": 0.46, "grad_norm": 2.7771498640982206, "learning_rate": 1.1699581773164155e-05, "loss": 0.8736, "step": 3020 }, { "epoch": 0.46, "grad_norm": 2.9162027035422398, "learning_rate": 1.1694696151080282e-05, "loss": 0.9148, "step": 3021 }, { "epoch": 0.46, "grad_norm": 2.674802039440239, "learning_rate": 1.1689810112487626e-05, "loss": 0.8939, "step": 3022 }, { "epoch": 0.46, "grad_norm": 2.9827837301978692, "learning_rate": 1.1684923658587036e-05, "loss": 0.7724, "step": 3023 }, { "epoch": 0.46, "grad_norm": 2.446858036060685, "learning_rate": 1.1680036790579465e-05, "loss": 0.8349, "step": 3024 }, { "epoch": 0.46, "grad_norm": 2.7490258161168355, "learning_rate": 1.1675149509665972e-05, "loss": 0.8318, "step": 3025 }, { "epoch": 0.46, "grad_norm": 2.758105078173259, "learning_rate": 1.167026181704771e-05, "loss": 0.8289, "step": 3026 }, { "epoch": 0.46, "grad_norm": 7.353607851225301, "learning_rate": 1.1665373713925936e-05, "loss": 1.0972, "step": 3027 }, { "epoch": 0.46, "grad_norm": 3.3166270913512403, "learning_rate": 1.1660485201502011e-05, "loss": 0.8928, "step": 3028 }, { "epoch": 0.46, "grad_norm": 2.5484423724642578, "learning_rate": 1.1655596280977395e-05, "loss": 0.8378, "step": 3029 }, { "epoch": 0.46, "grad_norm": 2.7828491941027105, "learning_rate": 1.1650706953553644e-05, "loss": 0.8506, "step": 3030 }, { "epoch": 0.46, "grad_norm": 3.222252561223075, "learning_rate": 1.1645817220432421e-05, "loss": 0.7596, "step": 3031 }, { "epoch": 0.46, "grad_norm": 2.8353519578274593, "learning_rate": 1.1640927082815485e-05, "loss": 0.8111, "step": 3032 }, { "epoch": 0.46, "grad_norm": 2.712517208964851, "learning_rate": 1.1636036541904692e-05, "loss": 0.9596, "step": 3033 }, { "epoch": 0.46, "grad_norm": 3.02873034434939, "learning_rate": 1.1631145598901999e-05, "loss": 0.934, "step": 3034 }, { "epoch": 0.46, "grad_norm": 2.7491870247460337, "learning_rate": 1.1626254255009465e-05, "loss": 0.8391, "step": 3035 }, { "epoch": 0.46, "grad_norm": 2.522093212738503, "learning_rate": 1.162136251142925e-05, "loss": 0.8147, "step": 3036 }, { "epoch": 0.46, "grad_norm": 3.03519444835517, "learning_rate": 1.1616470369363602e-05, "loss": 0.892, "step": 3037 }, { "epoch": 0.47, "grad_norm": 3.769866454847948, "learning_rate": 1.161157783001487e-05, "loss": 0.9277, "step": 3038 }, { "epoch": 0.47, "grad_norm": 2.8005906197669908, "learning_rate": 1.1606684894585507e-05, "loss": 0.9335, "step": 3039 }, { "epoch": 0.47, "grad_norm": 2.6771495503120724, "learning_rate": 1.1601791564278057e-05, "loss": 0.8585, "step": 3040 }, { "epoch": 0.47, "grad_norm": 2.7458170630301493, "learning_rate": 1.1596897840295165e-05, "loss": 0.9842, "step": 3041 }, { "epoch": 0.47, "grad_norm": 2.42988492519934, "learning_rate": 1.159200372383957e-05, "loss": 0.8292, "step": 3042 }, { "epoch": 0.47, "grad_norm": 2.5103081399212375, "learning_rate": 1.1587109216114111e-05, "loss": 0.9228, "step": 3043 }, { "epoch": 0.47, "grad_norm": 2.589221115239344, "learning_rate": 1.1582214318321718e-05, "loss": 0.8647, "step": 3044 }, { "epoch": 0.47, "grad_norm": 4.811962497245336, "learning_rate": 1.1577319031665419e-05, "loss": 1.0228, "step": 3045 }, { "epoch": 0.47, "grad_norm": 2.6507114110052887, "learning_rate": 1.157242335734834e-05, "loss": 0.845, "step": 3046 }, { "epoch": 0.47, "grad_norm": 2.8573693138518306, "learning_rate": 1.1567527296573702e-05, "loss": 0.87, "step": 3047 }, { "epoch": 0.47, "grad_norm": 2.457127461940877, "learning_rate": 1.1562630850544816e-05, "loss": 0.86, "step": 3048 }, { "epoch": 0.47, "grad_norm": 2.556657489478441, "learning_rate": 1.1557734020465093e-05, "loss": 0.7787, "step": 3049 }, { "epoch": 0.47, "grad_norm": 4.76817538447232, "learning_rate": 1.1552836807538034e-05, "loss": 1.0745, "step": 3050 }, { "epoch": 0.47, "grad_norm": 2.828019544724989, "learning_rate": 1.154793921296724e-05, "loss": 0.7933, "step": 3051 }, { "epoch": 0.47, "grad_norm": 3.9926852113478217, "learning_rate": 1.1543041237956403e-05, "loss": 0.9776, "step": 3052 }, { "epoch": 0.47, "grad_norm": 2.7029689770823206, "learning_rate": 1.1538142883709305e-05, "loss": 0.9321, "step": 3053 }, { "epoch": 0.47, "grad_norm": 2.819858750783005, "learning_rate": 1.1533244151429825e-05, "loss": 0.8614, "step": 3054 }, { "epoch": 0.47, "grad_norm": 2.5754083770125895, "learning_rate": 1.1528345042321933e-05, "loss": 0.8534, "step": 3055 }, { "epoch": 0.47, "grad_norm": 2.808109444366165, "learning_rate": 1.1523445557589692e-05, "loss": 0.8464, "step": 3056 }, { "epoch": 0.47, "grad_norm": 2.642707924896552, "learning_rate": 1.1518545698437262e-05, "loss": 0.8235, "step": 3057 }, { "epoch": 0.47, "grad_norm": 2.793040715733123, "learning_rate": 1.1513645466068887e-05, "loss": 0.7877, "step": 3058 }, { "epoch": 0.47, "grad_norm": 3.2252924252687065, "learning_rate": 1.1508744861688912e-05, "loss": 0.7785, "step": 3059 }, { "epoch": 0.47, "grad_norm": 2.9322617811791116, "learning_rate": 1.150384388650176e-05, "loss": 0.8891, "step": 3060 }, { "epoch": 0.47, "grad_norm": 3.3079267028920536, "learning_rate": 1.149894254171196e-05, "loss": 0.8218, "step": 3061 }, { "epoch": 0.47, "grad_norm": 2.826653081264617, "learning_rate": 1.1494040828524122e-05, "loss": 0.8896, "step": 3062 }, { "epoch": 0.47, "grad_norm": 2.499685351473289, "learning_rate": 1.1489138748142949e-05, "loss": 0.8141, "step": 3063 }, { "epoch": 0.47, "grad_norm": 3.0839843943225453, "learning_rate": 1.1484236301773239e-05, "loss": 0.797, "step": 3064 }, { "epoch": 0.47, "grad_norm": 2.9392104478446295, "learning_rate": 1.1479333490619873e-05, "loss": 0.8057, "step": 3065 }, { "epoch": 0.47, "grad_norm": 2.544480005618585, "learning_rate": 1.147443031588782e-05, "loss": 0.8691, "step": 3066 }, { "epoch": 0.47, "grad_norm": 2.5467653566420805, "learning_rate": 1.146952677878215e-05, "loss": 0.7424, "step": 3067 }, { "epoch": 0.47, "grad_norm": 2.62107278620751, "learning_rate": 1.1464622880508012e-05, "loss": 0.967, "step": 3068 }, { "epoch": 0.47, "grad_norm": 6.543148916423793, "learning_rate": 1.1459718622270648e-05, "loss": 1.0334, "step": 3069 }, { "epoch": 0.47, "grad_norm": 2.4324678600471663, "learning_rate": 1.1454814005275388e-05, "loss": 0.8146, "step": 3070 }, { "epoch": 0.47, "grad_norm": 2.8998473892388326, "learning_rate": 1.1449909030727641e-05, "loss": 0.8289, "step": 3071 }, { "epoch": 0.47, "grad_norm": 2.7750117258591485, "learning_rate": 1.1445003699832922e-05, "loss": 1.0167, "step": 3072 }, { "epoch": 0.47, "grad_norm": 2.8018037931812034, "learning_rate": 1.144009801379682e-05, "loss": 0.9312, "step": 3073 }, { "epoch": 0.47, "grad_norm": 2.789138018988291, "learning_rate": 1.1435191973825015e-05, "loss": 0.9369, "step": 3074 }, { "epoch": 0.47, "grad_norm": 2.692334241154601, "learning_rate": 1.1430285581123278e-05, "loss": 0.911, "step": 3075 }, { "epoch": 0.47, "grad_norm": 2.5145506813955163, "learning_rate": 1.1425378836897457e-05, "loss": 0.8242, "step": 3076 }, { "epoch": 0.47, "grad_norm": 2.4979403035686945, "learning_rate": 1.1420471742353491e-05, "loss": 0.9209, "step": 3077 }, { "epoch": 0.47, "grad_norm": 2.502264436899139, "learning_rate": 1.141556429869741e-05, "loss": 0.806, "step": 3078 }, { "epoch": 0.47, "grad_norm": 2.734742905570955, "learning_rate": 1.1410656507135328e-05, "loss": 0.8528, "step": 3079 }, { "epoch": 0.47, "grad_norm": 2.722677435806108, "learning_rate": 1.1405748368873438e-05, "loss": 0.8267, "step": 3080 }, { "epoch": 0.47, "grad_norm": 2.6633849719505664, "learning_rate": 1.1400839885118026e-05, "loss": 0.8519, "step": 3081 }, { "epoch": 0.47, "grad_norm": 2.831404582758861, "learning_rate": 1.1395931057075455e-05, "loss": 0.839, "step": 3082 }, { "epoch": 0.47, "grad_norm": 2.7737992784725893, "learning_rate": 1.1391021885952182e-05, "loss": 0.8881, "step": 3083 }, { "epoch": 0.47, "grad_norm": 5.287806551319888, "learning_rate": 1.1386112372954745e-05, "loss": 0.9836, "step": 3084 }, { "epoch": 0.47, "grad_norm": 4.687326202298055, "learning_rate": 1.138120251928976e-05, "loss": 1.0422, "step": 3085 }, { "epoch": 0.47, "grad_norm": 2.5190937625243697, "learning_rate": 1.137629232616393e-05, "loss": 0.8285, "step": 3086 }, { "epoch": 0.47, "grad_norm": 2.6728531205610087, "learning_rate": 1.1371381794784051e-05, "loss": 0.8709, "step": 3087 }, { "epoch": 0.47, "grad_norm": 3.1875883834182965, "learning_rate": 1.1366470926356986e-05, "loss": 0.8351, "step": 3088 }, { "epoch": 0.47, "grad_norm": 2.6343183413218236, "learning_rate": 1.1361559722089691e-05, "loss": 0.9105, "step": 3089 }, { "epoch": 0.47, "grad_norm": 5.273978479754304, "learning_rate": 1.1356648183189203e-05, "loss": 1.0424, "step": 3090 }, { "epoch": 0.47, "grad_norm": 2.8158695606170863, "learning_rate": 1.1351736310862642e-05, "loss": 0.8838, "step": 3091 }, { "epoch": 0.47, "grad_norm": 2.808634999693899, "learning_rate": 1.1346824106317204e-05, "loss": 0.7946, "step": 3092 }, { "epoch": 0.47, "grad_norm": 2.9091587714578644, "learning_rate": 1.1341911570760176e-05, "loss": 0.8636, "step": 3093 }, { "epoch": 0.47, "grad_norm": 2.481053395723128, "learning_rate": 1.1336998705398918e-05, "loss": 0.8706, "step": 3094 }, { "epoch": 0.47, "grad_norm": 2.7924999881066985, "learning_rate": 1.1332085511440877e-05, "loss": 0.9168, "step": 3095 }, { "epoch": 0.47, "grad_norm": 2.7065320524178937, "learning_rate": 1.1327171990093574e-05, "loss": 0.8494, "step": 3096 }, { "epoch": 0.47, "grad_norm": 2.798614283817104, "learning_rate": 1.1322258142564619e-05, "loss": 0.8357, "step": 3097 }, { "epoch": 0.47, "grad_norm": 3.9673905386874635, "learning_rate": 1.13173439700617e-05, "loss": 0.9729, "step": 3098 }, { "epoch": 0.47, "grad_norm": 2.669600900005444, "learning_rate": 1.1312429473792576e-05, "loss": 0.8486, "step": 3099 }, { "epoch": 0.47, "grad_norm": 2.820936598288706, "learning_rate": 1.1307514654965097e-05, "loss": 0.8991, "step": 3100 }, { "epoch": 0.47, "grad_norm": 2.6125223944829936, "learning_rate": 1.1302599514787186e-05, "loss": 0.8566, "step": 3101 }, { "epoch": 0.47, "grad_norm": 2.739922897046682, "learning_rate": 1.129768405446685e-05, "loss": 0.9179, "step": 3102 }, { "epoch": 0.47, "grad_norm": 2.7433379542441787, "learning_rate": 1.1292768275212162e-05, "loss": 0.8841, "step": 3103 }, { "epoch": 0.48, "grad_norm": 2.670627406032204, "learning_rate": 1.1287852178231295e-05, "loss": 0.8187, "step": 3104 }, { "epoch": 0.48, "grad_norm": 2.9365550668107816, "learning_rate": 1.1282935764732477e-05, "loss": 0.9242, "step": 3105 }, { "epoch": 0.48, "grad_norm": 2.755246641824546, "learning_rate": 1.1278019035924032e-05, "loss": 0.9124, "step": 3106 }, { "epoch": 0.48, "grad_norm": 2.7649728926786143, "learning_rate": 1.1273101993014351e-05, "loss": 0.782, "step": 3107 }, { "epoch": 0.48, "grad_norm": 3.1261831995171527, "learning_rate": 1.1268184637211905e-05, "loss": 0.8278, "step": 3108 }, { "epoch": 0.48, "grad_norm": 2.6353217053491713, "learning_rate": 1.1263266969725244e-05, "loss": 0.893, "step": 3109 }, { "epoch": 0.48, "grad_norm": 2.8645760552772144, "learning_rate": 1.1258348991762994e-05, "loss": 0.7923, "step": 3110 }, { "epoch": 0.48, "grad_norm": 2.891997003857544, "learning_rate": 1.1253430704533847e-05, "loss": 0.7817, "step": 3111 }, { "epoch": 0.48, "grad_norm": 2.624875915617395, "learning_rate": 1.124851210924659e-05, "loss": 0.8476, "step": 3112 }, { "epoch": 0.48, "grad_norm": 2.7688792539306153, "learning_rate": 1.1243593207110073e-05, "loss": 0.8592, "step": 3113 }, { "epoch": 0.48, "grad_norm": 2.7742296596297518, "learning_rate": 1.1238673999333223e-05, "loss": 0.8803, "step": 3114 }, { "epoch": 0.48, "grad_norm": 2.793489256791983, "learning_rate": 1.1233754487125043e-05, "loss": 0.9501, "step": 3115 }, { "epoch": 0.48, "grad_norm": 2.813680144743365, "learning_rate": 1.1228834671694613e-05, "loss": 0.839, "step": 3116 }, { "epoch": 0.48, "grad_norm": 4.916078405373159, "learning_rate": 1.1223914554251085e-05, "loss": 1.0373, "step": 3117 }, { "epoch": 0.48, "grad_norm": 2.610000965016514, "learning_rate": 1.1218994136003685e-05, "loss": 0.8629, "step": 3118 }, { "epoch": 0.48, "grad_norm": 2.7625346058741904, "learning_rate": 1.1214073418161712e-05, "loss": 0.9373, "step": 3119 }, { "epoch": 0.48, "grad_norm": 2.6943411479104284, "learning_rate": 1.1209152401934546e-05, "loss": 0.8694, "step": 3120 }, { "epoch": 0.48, "grad_norm": 2.861777936032375, "learning_rate": 1.1204231088531631e-05, "loss": 0.7413, "step": 3121 }, { "epoch": 0.48, "grad_norm": 2.5235310218243376, "learning_rate": 1.1199309479162489e-05, "loss": 0.8644, "step": 3122 }, { "epoch": 0.48, "grad_norm": 2.497572177741026, "learning_rate": 1.119438757503671e-05, "loss": 0.8023, "step": 3123 }, { "epoch": 0.48, "grad_norm": 2.6581250101609863, "learning_rate": 1.1189465377363964e-05, "loss": 0.8173, "step": 3124 }, { "epoch": 0.48, "grad_norm": 2.7763787999992227, "learning_rate": 1.118454288735399e-05, "loss": 0.9271, "step": 3125 }, { "epoch": 0.48, "grad_norm": 2.5574303042021675, "learning_rate": 1.1179620106216597e-05, "loss": 0.9474, "step": 3126 }, { "epoch": 0.48, "grad_norm": 2.5624182985693587, "learning_rate": 1.117469703516166e-05, "loss": 0.8598, "step": 3127 }, { "epoch": 0.48, "grad_norm": 2.3757353182751983, "learning_rate": 1.1169773675399144e-05, "loss": 0.8019, "step": 3128 }, { "epoch": 0.48, "grad_norm": 2.690347384996987, "learning_rate": 1.1164850028139063e-05, "loss": 0.8593, "step": 3129 }, { "epoch": 0.48, "grad_norm": 2.4945781694298197, "learning_rate": 1.1159926094591514e-05, "loss": 0.8506, "step": 3130 }, { "epoch": 0.48, "grad_norm": 2.655067345590805, "learning_rate": 1.1155001875966663e-05, "loss": 0.894, "step": 3131 }, { "epoch": 0.48, "grad_norm": 2.5126207449004463, "learning_rate": 1.1150077373474745e-05, "loss": 0.7716, "step": 3132 }, { "epoch": 0.48, "grad_norm": 2.3273236917279556, "learning_rate": 1.1145152588326063e-05, "loss": 0.75, "step": 3133 }, { "epoch": 0.48, "grad_norm": 2.7025550289889817, "learning_rate": 1.1140227521730988e-05, "loss": 0.7937, "step": 3134 }, { "epoch": 0.48, "grad_norm": 2.742824070165681, "learning_rate": 1.1135302174899971e-05, "loss": 0.8877, "step": 3135 }, { "epoch": 0.48, "grad_norm": 3.237256074327542, "learning_rate": 1.113037654904352e-05, "loss": 0.8987, "step": 3136 }, { "epoch": 0.48, "grad_norm": 2.9438627721310087, "learning_rate": 1.1125450645372218e-05, "loss": 0.8522, "step": 3137 }, { "epoch": 0.48, "grad_norm": 2.886165850380071, "learning_rate": 1.1120524465096706e-05, "loss": 0.8938, "step": 3138 }, { "epoch": 0.48, "grad_norm": 2.656321602028388, "learning_rate": 1.1115598009427712e-05, "loss": 0.8773, "step": 3139 }, { "epoch": 0.48, "grad_norm": 2.6816869315729543, "learning_rate": 1.1110671279576014e-05, "loss": 0.8806, "step": 3140 }, { "epoch": 0.48, "grad_norm": 2.7861332807618555, "learning_rate": 1.1105744276752464e-05, "loss": 0.8534, "step": 3141 }, { "epoch": 0.48, "grad_norm": 2.7415889420632036, "learning_rate": 1.1100817002167983e-05, "loss": 0.8204, "step": 3142 }, { "epoch": 0.48, "grad_norm": 2.853777558312234, "learning_rate": 1.1095889457033557e-05, "loss": 0.8472, "step": 3143 }, { "epoch": 0.48, "grad_norm": 2.679333044964318, "learning_rate": 1.1090961642560238e-05, "loss": 0.8814, "step": 3144 }, { "epoch": 0.48, "grad_norm": 2.712063692602172, "learning_rate": 1.1086033559959143e-05, "loss": 0.8651, "step": 3145 }, { "epoch": 0.48, "grad_norm": 2.901893289890703, "learning_rate": 1.1081105210441458e-05, "loss": 0.9463, "step": 3146 }, { "epoch": 0.48, "grad_norm": 2.622942948873725, "learning_rate": 1.1076176595218438e-05, "loss": 0.8437, "step": 3147 }, { "epoch": 0.48, "grad_norm": 2.784225144697118, "learning_rate": 1.1071247715501387e-05, "loss": 0.9422, "step": 3148 }, { "epoch": 0.48, "grad_norm": 2.784905531348584, "learning_rate": 1.1066318572501695e-05, "loss": 0.857, "step": 3149 }, { "epoch": 0.48, "grad_norm": 2.614670266049605, "learning_rate": 1.1061389167430804e-05, "loss": 0.899, "step": 3150 }, { "epoch": 0.48, "grad_norm": 2.786304158975111, "learning_rate": 1.1056459501500223e-05, "loss": 0.9484, "step": 3151 }, { "epoch": 0.48, "grad_norm": 2.7894077314963335, "learning_rate": 1.1051529575921525e-05, "loss": 0.9577, "step": 3152 }, { "epoch": 0.48, "grad_norm": 2.621899779821202, "learning_rate": 1.1046599391906347e-05, "loss": 0.7899, "step": 3153 }, { "epoch": 0.48, "grad_norm": 2.7178037657407033, "learning_rate": 1.1041668950666395e-05, "loss": 0.7923, "step": 3154 }, { "epoch": 0.48, "grad_norm": 2.387761205410121, "learning_rate": 1.1036738253413431e-05, "loss": 0.7847, "step": 3155 }, { "epoch": 0.48, "grad_norm": 2.6422778659156134, "learning_rate": 1.1031807301359273e-05, "loss": 0.9249, "step": 3156 }, { "epoch": 0.48, "grad_norm": 2.9058694607373634, "learning_rate": 1.1026876095715825e-05, "loss": 0.8803, "step": 3157 }, { "epoch": 0.48, "grad_norm": 5.628653013302643, "learning_rate": 1.1021944637695032e-05, "loss": 0.9861, "step": 3158 }, { "epoch": 0.48, "grad_norm": 2.639775493358449, "learning_rate": 1.1017012928508905e-05, "loss": 0.8768, "step": 3159 }, { "epoch": 0.48, "grad_norm": 2.6560270557217853, "learning_rate": 1.1012080969369527e-05, "loss": 0.8731, "step": 3160 }, { "epoch": 0.48, "grad_norm": 2.9183755912857343, "learning_rate": 1.1007148761489031e-05, "loss": 0.8827, "step": 3161 }, { "epoch": 0.48, "grad_norm": 2.6162305768841576, "learning_rate": 1.1002216306079616e-05, "loss": 0.8166, "step": 3162 }, { "epoch": 0.48, "grad_norm": 2.580769173010447, "learning_rate": 1.099728360435354e-05, "loss": 0.9338, "step": 3163 }, { "epoch": 0.48, "grad_norm": 2.548212107677192, "learning_rate": 1.0992350657523123e-05, "loss": 0.7659, "step": 3164 }, { "epoch": 0.48, "grad_norm": 3.048637681072861, "learning_rate": 1.0987417466800749e-05, "loss": 0.9659, "step": 3165 }, { "epoch": 0.48, "grad_norm": 2.6843483103040544, "learning_rate": 1.0982484033398855e-05, "loss": 0.8631, "step": 3166 }, { "epoch": 0.48, "grad_norm": 2.6588237759332394, "learning_rate": 1.0977550358529935e-05, "loss": 0.8909, "step": 3167 }, { "epoch": 0.48, "grad_norm": 2.8436832837871253, "learning_rate": 1.0972616443406558e-05, "loss": 0.834, "step": 3168 }, { "epoch": 0.49, "grad_norm": 2.8045928596793903, "learning_rate": 1.0967682289241337e-05, "loss": 0.8808, "step": 3169 }, { "epoch": 0.49, "grad_norm": 2.7498769351854677, "learning_rate": 1.0962747897246949e-05, "loss": 0.8705, "step": 3170 }, { "epoch": 0.49, "grad_norm": 2.709629558405918, "learning_rate": 1.0957813268636127e-05, "loss": 0.8877, "step": 3171 }, { "epoch": 0.49, "grad_norm": 2.6672565570216595, "learning_rate": 1.0952878404621667e-05, "loss": 0.9211, "step": 3172 }, { "epoch": 0.49, "grad_norm": 2.855523920099627, "learning_rate": 1.0947943306416422e-05, "loss": 0.834, "step": 3173 }, { "epoch": 0.49, "grad_norm": 2.8752440568494277, "learning_rate": 1.0943007975233296e-05, "loss": 0.8943, "step": 3174 }, { "epoch": 0.49, "grad_norm": 2.7796180474907732, "learning_rate": 1.0938072412285257e-05, "loss": 0.9756, "step": 3175 }, { "epoch": 0.49, "grad_norm": 2.637847864052865, "learning_rate": 1.093313661878533e-05, "loss": 0.843, "step": 3176 }, { "epoch": 0.49, "grad_norm": 2.516870968925292, "learning_rate": 1.0928200595946594e-05, "loss": 0.8225, "step": 3177 }, { "epoch": 0.49, "grad_norm": 2.8040107938246868, "learning_rate": 1.092326434498218e-05, "loss": 0.8971, "step": 3178 }, { "epoch": 0.49, "grad_norm": 6.312917984262526, "learning_rate": 1.0918327867105284e-05, "loss": 1.0743, "step": 3179 }, { "epoch": 0.49, "grad_norm": 2.6661378147021946, "learning_rate": 1.0913391163529158e-05, "loss": 0.8194, "step": 3180 }, { "epoch": 0.49, "grad_norm": 2.923249564548845, "learning_rate": 1.0908454235467099e-05, "loss": 0.9235, "step": 3181 }, { "epoch": 0.49, "grad_norm": 4.124631933822971, "learning_rate": 1.0903517084132469e-05, "loss": 0.7906, "step": 3182 }, { "epoch": 0.49, "grad_norm": 2.6608958546046586, "learning_rate": 1.0898579710738675e-05, "loss": 0.9203, "step": 3183 }, { "epoch": 0.49, "grad_norm": 2.737613006287347, "learning_rate": 1.0893642116499194e-05, "loss": 0.95, "step": 3184 }, { "epoch": 0.49, "grad_norm": 2.804888270338999, "learning_rate": 1.0888704302627542e-05, "loss": 0.8623, "step": 3185 }, { "epoch": 0.49, "grad_norm": 2.710517749370958, "learning_rate": 1.0883766270337297e-05, "loss": 0.7938, "step": 3186 }, { "epoch": 0.49, "grad_norm": 2.846565399790556, "learning_rate": 1.0878828020842091e-05, "loss": 0.8408, "step": 3187 }, { "epoch": 0.49, "grad_norm": 3.2049410040769426, "learning_rate": 1.0873889555355606e-05, "loss": 0.845, "step": 3188 }, { "epoch": 0.49, "grad_norm": 2.9466179850540923, "learning_rate": 1.0868950875091573e-05, "loss": 0.8936, "step": 3189 }, { "epoch": 0.49, "grad_norm": 2.6737815841660186, "learning_rate": 1.0864011981263786e-05, "loss": 0.9218, "step": 3190 }, { "epoch": 0.49, "grad_norm": 2.620826306048308, "learning_rate": 1.085907287508609e-05, "loss": 0.9462, "step": 3191 }, { "epoch": 0.49, "grad_norm": 4.778997897619199, "learning_rate": 1.0854133557772373e-05, "loss": 0.9397, "step": 3192 }, { "epoch": 0.49, "grad_norm": 2.5247511860554677, "learning_rate": 1.0849194030536583e-05, "loss": 0.9028, "step": 3193 }, { "epoch": 0.49, "grad_norm": 2.4814349353822736, "learning_rate": 1.0844254294592716e-05, "loss": 0.7551, "step": 3194 }, { "epoch": 0.49, "grad_norm": 2.5717710565669205, "learning_rate": 1.0839314351154821e-05, "loss": 0.8836, "step": 3195 }, { "epoch": 0.49, "grad_norm": 3.061981208422528, "learning_rate": 1.0834374201436996e-05, "loss": 0.8326, "step": 3196 }, { "epoch": 0.49, "grad_norm": 4.710682198564873, "learning_rate": 1.0829433846653397e-05, "loss": 0.9966, "step": 3197 }, { "epoch": 0.49, "grad_norm": 2.631961606425523, "learning_rate": 1.082449328801822e-05, "loss": 0.9213, "step": 3198 }, { "epoch": 0.49, "grad_norm": 2.718721534981861, "learning_rate": 1.0819552526745716e-05, "loss": 0.7187, "step": 3199 }, { "epoch": 0.49, "grad_norm": 2.6937766875932736, "learning_rate": 1.0814611564050186e-05, "loss": 0.8894, "step": 3200 }, { "epoch": 0.49, "grad_norm": 2.8889819332593887, "learning_rate": 1.080967040114598e-05, "loss": 0.8874, "step": 3201 }, { "epoch": 0.49, "grad_norm": 2.7937136356082792, "learning_rate": 1.08047290392475e-05, "loss": 0.8424, "step": 3202 }, { "epoch": 0.49, "grad_norm": 2.644207184621602, "learning_rate": 1.0799787479569188e-05, "loss": 0.8399, "step": 3203 }, { "epoch": 0.49, "grad_norm": 3.3400959435695334, "learning_rate": 1.0794845723325544e-05, "loss": 0.8781, "step": 3204 }, { "epoch": 0.49, "grad_norm": 2.9983667609443976, "learning_rate": 1.0789903771731118e-05, "loss": 0.8109, "step": 3205 }, { "epoch": 0.49, "grad_norm": 2.617913805844195, "learning_rate": 1.0784961626000497e-05, "loss": 0.831, "step": 3206 }, { "epoch": 0.49, "grad_norm": 2.676909320940646, "learning_rate": 1.0780019287348321e-05, "loss": 0.9348, "step": 3207 }, { "epoch": 0.49, "grad_norm": 2.4113877770088146, "learning_rate": 1.0775076756989281e-05, "loss": 0.7715, "step": 3208 }, { "epoch": 0.49, "grad_norm": 2.873965751785654, "learning_rate": 1.0770134036138114e-05, "loss": 0.863, "step": 3209 }, { "epoch": 0.49, "grad_norm": 4.234018267681649, "learning_rate": 1.07651911260096e-05, "loss": 1.0551, "step": 3210 }, { "epoch": 0.49, "grad_norm": 2.601399561591246, "learning_rate": 1.0760248027818566e-05, "loss": 0.8906, "step": 3211 }, { "epoch": 0.49, "grad_norm": 2.5780137439885213, "learning_rate": 1.0755304742779891e-05, "loss": 0.8582, "step": 3212 }, { "epoch": 0.49, "grad_norm": 2.7272475315349687, "learning_rate": 1.0750361272108492e-05, "loss": 0.9181, "step": 3213 }, { "epoch": 0.49, "grad_norm": 3.0256202385156685, "learning_rate": 1.0745417617019336e-05, "loss": 0.9568, "step": 3214 }, { "epoch": 0.49, "grad_norm": 2.515863450716289, "learning_rate": 1.0740473778727436e-05, "loss": 0.7738, "step": 3215 }, { "epoch": 0.49, "grad_norm": 2.787365827288811, "learning_rate": 1.0735529758447851e-05, "loss": 0.9147, "step": 3216 }, { "epoch": 0.49, "grad_norm": 2.687163583453802, "learning_rate": 1.0730585557395682e-05, "loss": 0.9083, "step": 3217 }, { "epoch": 0.49, "grad_norm": 2.498617602623131, "learning_rate": 1.0725641176786066e-05, "loss": 0.8464, "step": 3218 }, { "epoch": 0.49, "grad_norm": 2.669957505128777, "learning_rate": 1.0720696617834203e-05, "loss": 0.8533, "step": 3219 }, { "epoch": 0.49, "grad_norm": 2.7615180189226725, "learning_rate": 1.0715751881755322e-05, "loss": 0.8023, "step": 3220 }, { "epoch": 0.49, "grad_norm": 2.84954465106346, "learning_rate": 1.0710806969764708e-05, "loss": 0.7787, "step": 3221 }, { "epoch": 0.49, "grad_norm": 2.761029530875673, "learning_rate": 1.0705861883077676e-05, "loss": 0.8775, "step": 3222 }, { "epoch": 0.49, "grad_norm": 2.7200088736275156, "learning_rate": 1.0700916622909584e-05, "loss": 0.8918, "step": 3223 }, { "epoch": 0.49, "grad_norm": 4.580459203264464, "learning_rate": 1.069597119047585e-05, "loss": 0.979, "step": 3224 }, { "epoch": 0.49, "grad_norm": 2.784799576334272, "learning_rate": 1.0691025586991913e-05, "loss": 0.8962, "step": 3225 }, { "epoch": 0.49, "grad_norm": 2.716320206142077, "learning_rate": 1.0686079813673266e-05, "loss": 0.8454, "step": 3226 }, { "epoch": 0.49, "grad_norm": 2.78252057186699, "learning_rate": 1.0681133871735447e-05, "loss": 0.7963, "step": 3227 }, { "epoch": 0.49, "grad_norm": 2.797761111042213, "learning_rate": 1.0676187762394024e-05, "loss": 0.8693, "step": 3228 }, { "epoch": 0.49, "grad_norm": 2.6328034403208016, "learning_rate": 1.0671241486864612e-05, "loss": 0.831, "step": 3229 }, { "epoch": 0.49, "grad_norm": 2.509790499903019, "learning_rate": 1.0666295046362866e-05, "loss": 0.8735, "step": 3230 }, { "epoch": 0.49, "grad_norm": 2.559415932905526, "learning_rate": 1.0661348442104488e-05, "loss": 0.8074, "step": 3231 }, { "epoch": 0.49, "grad_norm": 2.7346927684237206, "learning_rate": 1.0656401675305213e-05, "loss": 0.8779, "step": 3232 }, { "epoch": 0.49, "grad_norm": 2.790203708689786, "learning_rate": 1.0651454747180814e-05, "loss": 0.8612, "step": 3233 }, { "epoch": 0.5, "grad_norm": 2.570354502414417, "learning_rate": 1.0646507658947107e-05, "loss": 0.7987, "step": 3234 }, { "epoch": 0.5, "grad_norm": 2.864626432776611, "learning_rate": 1.0641560411819949e-05, "loss": 0.8281, "step": 3235 }, { "epoch": 0.5, "grad_norm": 2.703277856066407, "learning_rate": 1.0636613007015237e-05, "loss": 0.8933, "step": 3236 }, { "epoch": 0.5, "grad_norm": 2.5346887053544522, "learning_rate": 1.0631665445748903e-05, "loss": 0.8432, "step": 3237 }, { "epoch": 0.5, "grad_norm": 2.6720590127577113, "learning_rate": 1.0626717729236916e-05, "loss": 0.8069, "step": 3238 }, { "epoch": 0.5, "grad_norm": 2.696786972098412, "learning_rate": 1.062176985869529e-05, "loss": 0.8801, "step": 3239 }, { "epoch": 0.5, "grad_norm": 2.697229729382102, "learning_rate": 1.061682183534007e-05, "loss": 0.9402, "step": 3240 }, { "epoch": 0.5, "grad_norm": 2.948636608691428, "learning_rate": 1.0611873660387342e-05, "loss": 0.8494, "step": 3241 }, { "epoch": 0.5, "grad_norm": 2.8446098507092406, "learning_rate": 1.0606925335053227e-05, "loss": 0.8413, "step": 3242 }, { "epoch": 0.5, "grad_norm": 2.728881060896302, "learning_rate": 1.060197686055389e-05, "loss": 0.9709, "step": 3243 }, { "epoch": 0.5, "grad_norm": 2.7895855154963085, "learning_rate": 1.0597028238105524e-05, "loss": 0.7959, "step": 3244 }, { "epoch": 0.5, "grad_norm": 2.6499197839840463, "learning_rate": 1.0592079468924359e-05, "loss": 0.7914, "step": 3245 }, { "epoch": 0.5, "grad_norm": 4.911507768646488, "learning_rate": 1.0587130554226665e-05, "loss": 0.9737, "step": 3246 }, { "epoch": 0.5, "grad_norm": 2.900046966280705, "learning_rate": 1.0582181495228751e-05, "loss": 0.9709, "step": 3247 }, { "epoch": 0.5, "grad_norm": 2.590115515221143, "learning_rate": 1.0577232293146951e-05, "loss": 0.9294, "step": 3248 }, { "epoch": 0.5, "grad_norm": 2.5526063817207385, "learning_rate": 1.0572282949197646e-05, "loss": 0.8423, "step": 3249 }, { "epoch": 0.5, "grad_norm": 2.53629469601658, "learning_rate": 1.0567333464597238e-05, "loss": 0.9556, "step": 3250 }, { "epoch": 0.5, "grad_norm": 5.980707695112363, "learning_rate": 1.0562383840562179e-05, "loss": 0.9552, "step": 3251 }, { "epoch": 0.5, "grad_norm": 2.495283589020335, "learning_rate": 1.0557434078308941e-05, "loss": 0.7907, "step": 3252 }, { "epoch": 0.5, "grad_norm": 2.518120801724238, "learning_rate": 1.0552484179054041e-05, "loss": 0.8885, "step": 3253 }, { "epoch": 0.5, "grad_norm": 2.7614692883101126, "learning_rate": 1.0547534144014027e-05, "loss": 0.8517, "step": 3254 }, { "epoch": 0.5, "grad_norm": 2.8019226736605187, "learning_rate": 1.0542583974405476e-05, "loss": 0.9192, "step": 3255 }, { "epoch": 0.5, "grad_norm": 2.79675855043032, "learning_rate": 1.0537633671445002e-05, "loss": 0.8881, "step": 3256 }, { "epoch": 0.5, "grad_norm": 2.265933060436887, "learning_rate": 1.0532683236349248e-05, "loss": 0.7393, "step": 3257 }, { "epoch": 0.5, "grad_norm": 2.526929479088268, "learning_rate": 1.0527732670334897e-05, "loss": 0.8796, "step": 3258 }, { "epoch": 0.5, "grad_norm": 2.520864599696683, "learning_rate": 1.0522781974618652e-05, "loss": 0.8469, "step": 3259 }, { "epoch": 0.5, "grad_norm": 2.7165796819432604, "learning_rate": 1.0517831150417264e-05, "loss": 0.8224, "step": 3260 }, { "epoch": 0.5, "grad_norm": 2.6270000648683722, "learning_rate": 1.0512880198947501e-05, "loss": 0.938, "step": 3261 }, { "epoch": 0.5, "grad_norm": 2.79444465784981, "learning_rate": 1.050792912142617e-05, "loss": 0.7442, "step": 3262 }, { "epoch": 0.5, "grad_norm": 2.5324615943017106, "learning_rate": 1.0502977919070106e-05, "loss": 0.8856, "step": 3263 }, { "epoch": 0.5, "grad_norm": 2.533648583070449, "learning_rate": 1.0498026593096174e-05, "loss": 0.8138, "step": 3264 }, { "epoch": 0.5, "grad_norm": 2.483789225555165, "learning_rate": 1.0493075144721274e-05, "loss": 0.7558, "step": 3265 }, { "epoch": 0.5, "grad_norm": 2.8208179904872566, "learning_rate": 1.0488123575162332e-05, "loss": 0.8044, "step": 3266 }, { "epoch": 0.5, "grad_norm": 2.618894893257103, "learning_rate": 1.0483171885636307e-05, "loss": 0.8982, "step": 3267 }, { "epoch": 0.5, "grad_norm": 2.8409111924604673, "learning_rate": 1.0478220077360184e-05, "loss": 0.7764, "step": 3268 }, { "epoch": 0.5, "grad_norm": 3.6090076484711986, "learning_rate": 1.0473268151550977e-05, "loss": 0.9362, "step": 3269 }, { "epoch": 0.5, "grad_norm": 2.9134663249316786, "learning_rate": 1.0468316109425732e-05, "loss": 0.8487, "step": 3270 }, { "epoch": 0.5, "grad_norm": 2.5389919301057446, "learning_rate": 1.046336395220152e-05, "loss": 0.8479, "step": 3271 }, { "epoch": 0.5, "grad_norm": 2.4828818279246376, "learning_rate": 1.0458411681095444e-05, "loss": 0.8475, "step": 3272 }, { "epoch": 0.5, "grad_norm": 2.8052594263529405, "learning_rate": 1.0453459297324638e-05, "loss": 0.7965, "step": 3273 }, { "epoch": 0.5, "grad_norm": 6.018704108526814, "learning_rate": 1.0448506802106248e-05, "loss": 1.0924, "step": 3274 }, { "epoch": 0.5, "grad_norm": 2.7527598038819567, "learning_rate": 1.0443554196657468e-05, "loss": 0.908, "step": 3275 }, { "epoch": 0.5, "grad_norm": 3.1159386946264753, "learning_rate": 1.0438601482195507e-05, "loss": 0.9319, "step": 3276 }, { "epoch": 0.5, "grad_norm": 2.705038646126299, "learning_rate": 1.0433648659937604e-05, "loss": 0.813, "step": 3277 }, { "epoch": 0.5, "grad_norm": 2.5206348247401427, "learning_rate": 1.042869573110102e-05, "loss": 0.7875, "step": 3278 }, { "epoch": 0.5, "grad_norm": 2.7917041949509933, "learning_rate": 1.0423742696903047e-05, "loss": 0.9462, "step": 3279 }, { "epoch": 0.5, "grad_norm": 2.6746082283942822, "learning_rate": 1.0418789558561009e-05, "loss": 0.9436, "step": 3280 }, { "epoch": 0.5, "grad_norm": 2.5656588357619428, "learning_rate": 1.0413836317292237e-05, "loss": 0.9148, "step": 3281 }, { "epoch": 0.5, "grad_norm": 2.6118866244099777, "learning_rate": 1.0408882974314107e-05, "loss": 0.9189, "step": 3282 }, { "epoch": 0.5, "grad_norm": 2.6684671051310556, "learning_rate": 1.040392953084401e-05, "loss": 0.8033, "step": 3283 }, { "epoch": 0.5, "grad_norm": 2.501298470832651, "learning_rate": 1.0398975988099364e-05, "loss": 0.8298, "step": 3284 }, { "epoch": 0.5, "grad_norm": 2.5541503907350083, "learning_rate": 1.0394022347297607e-05, "loss": 0.8786, "step": 3285 }, { "epoch": 0.5, "grad_norm": 2.5594560871040892, "learning_rate": 1.038906860965621e-05, "loss": 0.8968, "step": 3286 }, { "epoch": 0.5, "grad_norm": 2.5151461823220593, "learning_rate": 1.038411477639266e-05, "loss": 0.9096, "step": 3287 }, { "epoch": 0.5, "grad_norm": 2.636485823466242, "learning_rate": 1.037916084872447e-05, "loss": 0.8021, "step": 3288 }, { "epoch": 0.5, "grad_norm": 2.72865389979779, "learning_rate": 1.0374206827869177e-05, "loss": 0.907, "step": 3289 }, { "epoch": 0.5, "grad_norm": 2.818026224530828, "learning_rate": 1.0369252715044343e-05, "loss": 0.8863, "step": 3290 }, { "epoch": 0.5, "grad_norm": 2.7254428761840406, "learning_rate": 1.0364298511467548e-05, "loss": 0.88, "step": 3291 }, { "epoch": 0.5, "grad_norm": 2.728241616850513, "learning_rate": 1.0359344218356393e-05, "loss": 0.8195, "step": 3292 }, { "epoch": 0.5, "grad_norm": 2.8484344836013813, "learning_rate": 1.0354389836928507e-05, "loss": 0.9687, "step": 3293 }, { "epoch": 0.5, "grad_norm": 3.096231522927287, "learning_rate": 1.0349435368401541e-05, "loss": 0.8959, "step": 3294 }, { "epoch": 0.5, "grad_norm": 2.6238474886319425, "learning_rate": 1.0344480813993163e-05, "loss": 0.8663, "step": 3295 }, { "epoch": 0.5, "grad_norm": 2.785385869115943, "learning_rate": 1.033952617492106e-05, "loss": 0.9265, "step": 3296 }, { "epoch": 0.5, "grad_norm": 2.4841009237509755, "learning_rate": 1.0334571452402943e-05, "loss": 0.8481, "step": 3297 }, { "epoch": 0.5, "grad_norm": 2.6596124256370652, "learning_rate": 1.032961664765655e-05, "loss": 0.8183, "step": 3298 }, { "epoch": 0.5, "grad_norm": 2.4437143863687227, "learning_rate": 1.0324661761899629e-05, "loss": 0.8173, "step": 3299 }, { "epoch": 0.51, "grad_norm": 3.29109457912766, "learning_rate": 1.0319706796349954e-05, "loss": 0.9644, "step": 3300 }, { "epoch": 0.51, "grad_norm": 2.797304793482052, "learning_rate": 1.0314751752225311e-05, "loss": 0.8918, "step": 3301 }, { "epoch": 0.51, "grad_norm": 2.7597018438887027, "learning_rate": 1.0309796630743518e-05, "loss": 0.819, "step": 3302 }, { "epoch": 0.51, "grad_norm": 2.3860829479408974, "learning_rate": 1.0304841433122399e-05, "loss": 0.9725, "step": 3303 }, { "epoch": 0.51, "grad_norm": 2.6324701321499036, "learning_rate": 1.0299886160579806e-05, "loss": 0.8367, "step": 3304 }, { "epoch": 0.51, "grad_norm": 2.501387059647868, "learning_rate": 1.0294930814333605e-05, "loss": 0.7974, "step": 3305 }, { "epoch": 0.51, "grad_norm": 2.525680580078367, "learning_rate": 1.0289975395601686e-05, "loss": 0.7649, "step": 3306 }, { "epoch": 0.51, "grad_norm": 2.686467602375458, "learning_rate": 1.0285019905601943e-05, "loss": 0.8886, "step": 3307 }, { "epoch": 0.51, "grad_norm": 3.1918554685564837, "learning_rate": 1.02800643455523e-05, "loss": 0.7807, "step": 3308 }, { "epoch": 0.51, "grad_norm": 2.636561913205046, "learning_rate": 1.0275108716670698e-05, "loss": 0.7962, "step": 3309 }, { "epoch": 0.51, "grad_norm": 2.719254863507505, "learning_rate": 1.0270153020175092e-05, "loss": 1.0061, "step": 3310 }, { "epoch": 0.51, "grad_norm": 2.801691612655321, "learning_rate": 1.0265197257283444e-05, "loss": 0.8736, "step": 3311 }, { "epoch": 0.51, "grad_norm": 2.590671703849368, "learning_rate": 1.0260241429213754e-05, "loss": 0.9129, "step": 3312 }, { "epoch": 0.51, "grad_norm": 2.8841881720350333, "learning_rate": 1.0255285537184016e-05, "loss": 0.8225, "step": 3313 }, { "epoch": 0.51, "grad_norm": 2.4984456373558555, "learning_rate": 1.0250329582412253e-05, "loss": 0.8641, "step": 3314 }, { "epoch": 0.51, "grad_norm": 2.596338224358061, "learning_rate": 1.0245373566116496e-05, "loss": 0.8932, "step": 3315 }, { "epoch": 0.51, "grad_norm": 2.7568003722125938, "learning_rate": 1.0240417489514802e-05, "loss": 0.8756, "step": 3316 }, { "epoch": 0.51, "grad_norm": 2.866798445477397, "learning_rate": 1.0235461353825234e-05, "loss": 0.8654, "step": 3317 }, { "epoch": 0.51, "grad_norm": 2.5433943971852275, "learning_rate": 1.0230505160265867e-05, "loss": 0.8851, "step": 3318 }, { "epoch": 0.51, "grad_norm": 2.9431924246520564, "learning_rate": 1.0225548910054794e-05, "loss": 0.8239, "step": 3319 }, { "epoch": 0.51, "grad_norm": 2.745923605151662, "learning_rate": 1.0220592604410127e-05, "loss": 0.8627, "step": 3320 }, { "epoch": 0.51, "grad_norm": 6.277014146147613, "learning_rate": 1.0215636244549985e-05, "loss": 0.9909, "step": 3321 }, { "epoch": 0.51, "grad_norm": 2.9053879625025125, "learning_rate": 1.02106798316925e-05, "loss": 0.8434, "step": 3322 }, { "epoch": 0.51, "grad_norm": 2.584237482121651, "learning_rate": 1.0205723367055821e-05, "loss": 0.842, "step": 3323 }, { "epoch": 0.51, "grad_norm": 2.8707112582385177, "learning_rate": 1.0200766851858112e-05, "loss": 0.935, "step": 3324 }, { "epoch": 0.51, "grad_norm": 2.447159641817951, "learning_rate": 1.0195810287317539e-05, "loss": 0.8213, "step": 3325 }, { "epoch": 0.51, "grad_norm": 3.651398982692408, "learning_rate": 1.0190853674652289e-05, "loss": 0.9345, "step": 3326 }, { "epoch": 0.51, "grad_norm": 3.411584643818777, "learning_rate": 1.0185897015080555e-05, "loss": 0.8791, "step": 3327 }, { "epoch": 0.51, "grad_norm": 2.728514157055375, "learning_rate": 1.0180940309820553e-05, "loss": 0.8527, "step": 3328 }, { "epoch": 0.51, "grad_norm": 2.3547298609445724, "learning_rate": 1.0175983560090496e-05, "loss": 0.8094, "step": 3329 }, { "epoch": 0.51, "grad_norm": 2.439730352013072, "learning_rate": 1.0171026767108617e-05, "loss": 0.7593, "step": 3330 }, { "epoch": 0.51, "grad_norm": 3.0534143692632862, "learning_rate": 1.0166069932093152e-05, "loss": 0.8036, "step": 3331 }, { "epoch": 0.51, "grad_norm": 2.683965074654162, "learning_rate": 1.016111305626236e-05, "loss": 0.8081, "step": 3332 }, { "epoch": 0.51, "grad_norm": 2.57505920815486, "learning_rate": 1.0156156140834492e-05, "loss": 0.753, "step": 3333 }, { "epoch": 0.51, "grad_norm": 4.283608619024065, "learning_rate": 1.0151199187027828e-05, "loss": 0.9922, "step": 3334 }, { "epoch": 0.51, "grad_norm": 2.7688976110505235, "learning_rate": 1.0146242196060646e-05, "loss": 0.8464, "step": 3335 }, { "epoch": 0.51, "grad_norm": 2.68577175813711, "learning_rate": 1.0141285169151229e-05, "loss": 0.7952, "step": 3336 }, { "epoch": 0.51, "grad_norm": 2.562544645505176, "learning_rate": 1.0136328107517881e-05, "loss": 0.9107, "step": 3337 }, { "epoch": 0.51, "grad_norm": 2.6052512511645904, "learning_rate": 1.0131371012378907e-05, "loss": 0.7702, "step": 3338 }, { "epoch": 0.51, "grad_norm": 3.0419214898098548, "learning_rate": 1.0126413884952626e-05, "loss": 0.7645, "step": 3339 }, { "epoch": 0.51, "grad_norm": 2.671771314701333, "learning_rate": 1.0121456726457357e-05, "loss": 0.8947, "step": 3340 }, { "epoch": 0.51, "grad_norm": 2.650762861555099, "learning_rate": 1.0116499538111428e-05, "loss": 0.8634, "step": 3341 }, { "epoch": 0.51, "grad_norm": 2.634411025008088, "learning_rate": 1.0111542321133182e-05, "loss": 0.8083, "step": 3342 }, { "epoch": 0.51, "grad_norm": 4.41310547661801, "learning_rate": 1.010658507674096e-05, "loss": 0.9767, "step": 3343 }, { "epoch": 0.51, "grad_norm": 2.568605596401079, "learning_rate": 1.0101627806153117e-05, "loss": 0.8827, "step": 3344 }, { "epoch": 0.51, "grad_norm": 2.7319944186757406, "learning_rate": 1.0096670510588009e-05, "loss": 0.8224, "step": 3345 }, { "epoch": 0.51, "grad_norm": 2.283551340145881, "learning_rate": 1.0091713191264001e-05, "loss": 0.7559, "step": 3346 }, { "epoch": 0.51, "grad_norm": 4.012626492995786, "learning_rate": 1.0086755849399464e-05, "loss": 0.986, "step": 3347 }, { "epoch": 0.51, "grad_norm": 2.686251388531931, "learning_rate": 1.008179848621277e-05, "loss": 0.8781, "step": 3348 }, { "epoch": 0.51, "grad_norm": 2.4624808391304565, "learning_rate": 1.0076841102922301e-05, "loss": 0.8124, "step": 3349 }, { "epoch": 0.51, "grad_norm": 2.6461052882685365, "learning_rate": 1.0071883700746448e-05, "loss": 0.8288, "step": 3350 }, { "epoch": 0.51, "grad_norm": 2.712048135899838, "learning_rate": 1.0066926280903598e-05, "loss": 0.8369, "step": 3351 }, { "epoch": 0.51, "grad_norm": 2.5896984185944394, "learning_rate": 1.0061968844612143e-05, "loss": 0.8532, "step": 3352 }, { "epoch": 0.51, "grad_norm": 2.8039264113702904, "learning_rate": 1.0057011393090481e-05, "loss": 0.8418, "step": 3353 }, { "epoch": 0.51, "grad_norm": 2.930467198579045, "learning_rate": 1.0052053927557022e-05, "loss": 0.8305, "step": 3354 }, { "epoch": 0.51, "grad_norm": 3.02410853561508, "learning_rate": 1.0047096449230164e-05, "loss": 0.8893, "step": 3355 }, { "epoch": 0.51, "grad_norm": 2.6144880865805216, "learning_rate": 1.0042138959328322e-05, "loss": 0.7572, "step": 3356 }, { "epoch": 0.51, "grad_norm": 2.7977719436946544, "learning_rate": 1.0037181459069905e-05, "loss": 0.8333, "step": 3357 }, { "epoch": 0.51, "grad_norm": 2.5403140670311344, "learning_rate": 1.003222394967333e-05, "loss": 0.7677, "step": 3358 }, { "epoch": 0.51, "grad_norm": 2.6608801644930726, "learning_rate": 1.0027266432357007e-05, "loss": 0.924, "step": 3359 }, { "epoch": 0.51, "grad_norm": 2.704651179422154, "learning_rate": 1.0022308908339365e-05, "loss": 0.7022, "step": 3360 }, { "epoch": 0.51, "grad_norm": 2.679460239629454, "learning_rate": 1.0017351378838817e-05, "loss": 0.8915, "step": 3361 }, { "epoch": 0.51, "grad_norm": 2.498793390857166, "learning_rate": 1.0012393845073787e-05, "loss": 0.7512, "step": 3362 }, { "epoch": 0.51, "grad_norm": 2.6430015156102837, "learning_rate": 1.0007436308262696e-05, "loss": 0.8217, "step": 3363 }, { "epoch": 0.51, "grad_norm": 2.735772542706487, "learning_rate": 1.000247876962397e-05, "loss": 0.8334, "step": 3364 }, { "epoch": 0.52, "grad_norm": 2.515510191667527, "learning_rate": 9.997521230376032e-06, "loss": 0.8312, "step": 3365 }, { "epoch": 0.52, "grad_norm": 2.8658668233468925, "learning_rate": 9.992563691737304e-06, "loss": 0.9067, "step": 3366 }, { "epoch": 0.52, "grad_norm": 2.837582352635219, "learning_rate": 9.987606154926214e-06, "loss": 0.7849, "step": 3367 }, { "epoch": 0.52, "grad_norm": 2.7723264928964144, "learning_rate": 9.982648621161188e-06, "loss": 0.8556, "step": 3368 }, { "epoch": 0.52, "grad_norm": 2.7701920524217827, "learning_rate": 9.977691091660637e-06, "loss": 0.7989, "step": 3369 }, { "epoch": 0.52, "grad_norm": 2.7434900183215976, "learning_rate": 9.972733567642994e-06, "loss": 0.8515, "step": 3370 }, { "epoch": 0.52, "grad_norm": 2.8262306070225014, "learning_rate": 9.967776050326675e-06, "loss": 0.768, "step": 3371 }, { "epoch": 0.52, "grad_norm": 2.630803167653262, "learning_rate": 9.962818540930095e-06, "loss": 0.8, "step": 3372 }, { "epoch": 0.52, "grad_norm": 2.7316271034593185, "learning_rate": 9.95786104067168e-06, "loss": 0.8565, "step": 3373 }, { "epoch": 0.52, "grad_norm": 2.5927251961593463, "learning_rate": 9.952903550769837e-06, "loss": 0.843, "step": 3374 }, { "epoch": 0.52, "grad_norm": 2.682418900705461, "learning_rate": 9.947946072442982e-06, "loss": 0.9163, "step": 3375 }, { "epoch": 0.52, "grad_norm": 2.4891777647767173, "learning_rate": 9.942988606909522e-06, "loss": 0.7926, "step": 3376 }, { "epoch": 0.52, "grad_norm": 2.7278232479343947, "learning_rate": 9.938031155387859e-06, "loss": 0.8517, "step": 3377 }, { "epoch": 0.52, "grad_norm": 2.6922445372108488, "learning_rate": 9.933073719096406e-06, "loss": 0.916, "step": 3378 }, { "epoch": 0.52, "grad_norm": 2.562382246736542, "learning_rate": 9.928116299253553e-06, "loss": 0.8839, "step": 3379 }, { "epoch": 0.52, "grad_norm": 2.4537029707439313, "learning_rate": 9.923158897077699e-06, "loss": 0.8054, "step": 3380 }, { "epoch": 0.52, "grad_norm": 3.3236126145506613, "learning_rate": 9.918201513787233e-06, "loss": 0.7271, "step": 3381 }, { "epoch": 0.52, "grad_norm": 3.055213047874377, "learning_rate": 9.913244150600541e-06, "loss": 0.9046, "step": 3382 }, { "epoch": 0.52, "grad_norm": 2.553739553514491, "learning_rate": 9.908286808735999e-06, "loss": 0.8065, "step": 3383 }, { "epoch": 0.52, "grad_norm": 2.4245662109666766, "learning_rate": 9.903329489411993e-06, "loss": 0.8129, "step": 3384 }, { "epoch": 0.52, "grad_norm": 2.660906783044096, "learning_rate": 9.898372193846887e-06, "loss": 0.8541, "step": 3385 }, { "epoch": 0.52, "grad_norm": 2.578729689514895, "learning_rate": 9.893414923259042e-06, "loss": 0.8421, "step": 3386 }, { "epoch": 0.52, "grad_norm": 2.5078486683767416, "learning_rate": 9.888457678866823e-06, "loss": 0.8087, "step": 3387 }, { "epoch": 0.52, "grad_norm": 2.521637554958834, "learning_rate": 9.883500461888573e-06, "loss": 0.8557, "step": 3388 }, { "epoch": 0.52, "grad_norm": 2.6795766617286967, "learning_rate": 9.878543273542648e-06, "loss": 0.7181, "step": 3389 }, { "epoch": 0.52, "grad_norm": 2.498689771309547, "learning_rate": 9.873586115047377e-06, "loss": 0.8819, "step": 3390 }, { "epoch": 0.52, "grad_norm": 2.577184398412776, "learning_rate": 9.868628987621095e-06, "loss": 0.8566, "step": 3391 }, { "epoch": 0.52, "grad_norm": 2.793311417654129, "learning_rate": 9.863671892482122e-06, "loss": 0.8169, "step": 3392 }, { "epoch": 0.52, "grad_norm": 2.620997389101896, "learning_rate": 9.858714830848776e-06, "loss": 0.9171, "step": 3393 }, { "epoch": 0.52, "grad_norm": 2.778207542389812, "learning_rate": 9.853757803939358e-06, "loss": 0.8429, "step": 3394 }, { "epoch": 0.52, "grad_norm": 2.9562677095301786, "learning_rate": 9.848800812972175e-06, "loss": 0.8696, "step": 3395 }, { "epoch": 0.52, "grad_norm": 2.599642093006926, "learning_rate": 9.84384385916551e-06, "loss": 0.7961, "step": 3396 }, { "epoch": 0.52, "grad_norm": 3.0071841120945737, "learning_rate": 9.838886943737645e-06, "loss": 0.8398, "step": 3397 }, { "epoch": 0.52, "grad_norm": 2.696431412561743, "learning_rate": 9.833930067906851e-06, "loss": 0.8396, "step": 3398 }, { "epoch": 0.52, "grad_norm": 2.430701029582184, "learning_rate": 9.828973232891385e-06, "loss": 0.7366, "step": 3399 }, { "epoch": 0.52, "grad_norm": 2.8167533231043294, "learning_rate": 9.824016439909505e-06, "loss": 0.8484, "step": 3400 }, { "epoch": 0.52, "grad_norm": 2.687160062145975, "learning_rate": 9.81905969017945e-06, "loss": 0.8009, "step": 3401 }, { "epoch": 0.52, "grad_norm": 2.4484114257757734, "learning_rate": 9.814102984919445e-06, "loss": 0.7761, "step": 3402 }, { "epoch": 0.52, "grad_norm": 2.6496492282970485, "learning_rate": 9.809146325347716e-06, "loss": 0.7687, "step": 3403 }, { "epoch": 0.52, "grad_norm": 4.864677266471541, "learning_rate": 9.804189712682466e-06, "loss": 1.023, "step": 3404 }, { "epoch": 0.52, "grad_norm": 2.7989202592935984, "learning_rate": 9.79923314814189e-06, "loss": 0.8775, "step": 3405 }, { "epoch": 0.52, "grad_norm": 2.586169913513614, "learning_rate": 9.79427663294418e-06, "loss": 0.7965, "step": 3406 }, { "epoch": 0.52, "grad_norm": 2.8333347061539187, "learning_rate": 9.7893201683075e-06, "loss": 0.9712, "step": 3407 }, { "epoch": 0.52, "grad_norm": 2.719996695889003, "learning_rate": 9.784363755450018e-06, "loss": 0.9177, "step": 3408 }, { "epoch": 0.52, "grad_norm": 2.4773553077282444, "learning_rate": 9.779407395589876e-06, "loss": 0.7608, "step": 3409 }, { "epoch": 0.52, "grad_norm": 2.5475236772585563, "learning_rate": 9.774451089945206e-06, "loss": 0.7846, "step": 3410 }, { "epoch": 0.52, "grad_norm": 3.0188556226064445, "learning_rate": 9.769494839734136e-06, "loss": 0.8426, "step": 3411 }, { "epoch": 0.52, "grad_norm": 4.254714206692356, "learning_rate": 9.764538646174771e-06, "loss": 0.9804, "step": 3412 }, { "epoch": 0.52, "grad_norm": 2.827330030766981, "learning_rate": 9.759582510485198e-06, "loss": 0.857, "step": 3413 }, { "epoch": 0.52, "grad_norm": 2.4734544365088404, "learning_rate": 9.754626433883506e-06, "loss": 0.8474, "step": 3414 }, { "epoch": 0.52, "grad_norm": 2.704908642099613, "learning_rate": 9.749670417587753e-06, "loss": 0.9602, "step": 3415 }, { "epoch": 0.52, "grad_norm": 2.6888669635440077, "learning_rate": 9.744714462815987e-06, "loss": 0.8605, "step": 3416 }, { "epoch": 0.52, "grad_norm": 2.709281617887562, "learning_rate": 9.739758570786251e-06, "loss": 0.8596, "step": 3417 }, { "epoch": 0.52, "grad_norm": 2.7199414147145395, "learning_rate": 9.734802742716556e-06, "loss": 0.7955, "step": 3418 }, { "epoch": 0.52, "grad_norm": 2.509186081480435, "learning_rate": 9.729846979824913e-06, "loss": 0.8612, "step": 3419 }, { "epoch": 0.52, "grad_norm": 2.9166514156562298, "learning_rate": 9.724891283329305e-06, "loss": 0.9359, "step": 3420 }, { "epoch": 0.52, "grad_norm": 2.496906473025068, "learning_rate": 9.7199356544477e-06, "loss": 0.7673, "step": 3421 }, { "epoch": 0.52, "grad_norm": 2.5023038128266286, "learning_rate": 9.714980094398059e-06, "loss": 0.8332, "step": 3422 }, { "epoch": 0.52, "grad_norm": 2.75657816335714, "learning_rate": 9.710024604398317e-06, "loss": 0.8968, "step": 3423 }, { "epoch": 0.52, "grad_norm": 2.5561294321392594, "learning_rate": 9.705069185666396e-06, "loss": 0.7787, "step": 3424 }, { "epoch": 0.52, "grad_norm": 3.1037707547457294, "learning_rate": 9.700113839420197e-06, "loss": 0.8882, "step": 3425 }, { "epoch": 0.52, "grad_norm": 2.4952606751734274, "learning_rate": 9.695158566877606e-06, "loss": 0.9268, "step": 3426 }, { "epoch": 0.52, "grad_norm": 6.026747964805295, "learning_rate": 9.690203369256486e-06, "loss": 0.9835, "step": 3427 }, { "epoch": 0.52, "grad_norm": 2.5480816986475294, "learning_rate": 9.685248247774692e-06, "loss": 0.7946, "step": 3428 }, { "epoch": 0.52, "grad_norm": 2.604296116527149, "learning_rate": 9.68029320365005e-06, "loss": 0.8441, "step": 3429 }, { "epoch": 0.53, "grad_norm": 2.510085849702647, "learning_rate": 9.675338238100375e-06, "loss": 0.7499, "step": 3430 }, { "epoch": 0.53, "grad_norm": 2.9373262344980984, "learning_rate": 9.670383352343454e-06, "loss": 0.9004, "step": 3431 }, { "epoch": 0.53, "grad_norm": 2.6469345532827955, "learning_rate": 9.665428547597057e-06, "loss": 0.8273, "step": 3432 }, { "epoch": 0.53, "grad_norm": 2.761930481958474, "learning_rate": 9.660473825078944e-06, "loss": 0.819, "step": 3433 }, { "epoch": 0.53, "grad_norm": 2.4498718953719414, "learning_rate": 9.655519186006842e-06, "loss": 0.7963, "step": 3434 }, { "epoch": 0.53, "grad_norm": 2.786495910453177, "learning_rate": 9.65056463159846e-06, "loss": 0.913, "step": 3435 }, { "epoch": 0.53, "grad_norm": 2.53808454970735, "learning_rate": 9.645610163071495e-06, "loss": 0.82, "step": 3436 }, { "epoch": 0.53, "grad_norm": 2.5015219428230586, "learning_rate": 9.640655781643612e-06, "loss": 0.7981, "step": 3437 }, { "epoch": 0.53, "grad_norm": 2.4680609160616767, "learning_rate": 9.635701488532455e-06, "loss": 0.7857, "step": 3438 }, { "epoch": 0.53, "grad_norm": 3.990846500890265, "learning_rate": 9.63074728495566e-06, "loss": 0.9372, "step": 3439 }, { "epoch": 0.53, "grad_norm": 2.9446715414792775, "learning_rate": 9.625793172130825e-06, "loss": 0.7747, "step": 3440 }, { "epoch": 0.53, "grad_norm": 2.483984216486534, "learning_rate": 9.620839151275534e-06, "loss": 0.8843, "step": 3441 }, { "epoch": 0.53, "grad_norm": 2.7169965735793693, "learning_rate": 9.615885223607345e-06, "loss": 0.8942, "step": 3442 }, { "epoch": 0.53, "grad_norm": 2.544915450697256, "learning_rate": 9.610931390343792e-06, "loss": 0.8571, "step": 3443 }, { "epoch": 0.53, "grad_norm": 2.6189570146790913, "learning_rate": 9.605977652702394e-06, "loss": 0.8119, "step": 3444 }, { "epoch": 0.53, "grad_norm": 2.771286909908849, "learning_rate": 9.60102401190064e-06, "loss": 0.8558, "step": 3445 }, { "epoch": 0.53, "grad_norm": 2.627183298351705, "learning_rate": 9.596070469155992e-06, "loss": 0.8693, "step": 3446 }, { "epoch": 0.53, "grad_norm": 2.602025072939709, "learning_rate": 9.591117025685897e-06, "loss": 0.8599, "step": 3447 }, { "epoch": 0.53, "grad_norm": 2.7213601358474806, "learning_rate": 9.586163682707768e-06, "loss": 0.754, "step": 3448 }, { "epoch": 0.53, "grad_norm": 2.7410310874423023, "learning_rate": 9.581210441438994e-06, "loss": 0.8447, "step": 3449 }, { "epoch": 0.53, "grad_norm": 2.44157798300347, "learning_rate": 9.576257303096955e-06, "loss": 0.8248, "step": 3450 }, { "epoch": 0.53, "grad_norm": 2.6606986031824222, "learning_rate": 9.571304268898983e-06, "loss": 0.99, "step": 3451 }, { "epoch": 0.53, "grad_norm": 2.703515781558748, "learning_rate": 9.566351340062401e-06, "loss": 0.7711, "step": 3452 }, { "epoch": 0.53, "grad_norm": 2.727709716865192, "learning_rate": 9.561398517804498e-06, "loss": 0.7937, "step": 3453 }, { "epoch": 0.53, "grad_norm": 2.634863441280792, "learning_rate": 9.556445803342532e-06, "loss": 0.836, "step": 3454 }, { "epoch": 0.53, "grad_norm": 6.148624164260505, "learning_rate": 9.551493197893755e-06, "loss": 0.9873, "step": 3455 }, { "epoch": 0.53, "grad_norm": 2.8217903157217106, "learning_rate": 9.546540702675369e-06, "loss": 0.841, "step": 3456 }, { "epoch": 0.53, "grad_norm": 2.667756818733098, "learning_rate": 9.541588318904558e-06, "loss": 0.8399, "step": 3457 }, { "epoch": 0.53, "grad_norm": 6.8915142027475165, "learning_rate": 9.536636047798484e-06, "loss": 0.8674, "step": 3458 }, { "epoch": 0.53, "grad_norm": 2.5080794197051826, "learning_rate": 9.531683890574275e-06, "loss": 0.7672, "step": 3459 }, { "epoch": 0.53, "grad_norm": 2.8576179806465003, "learning_rate": 9.526731848449025e-06, "loss": 0.8458, "step": 3460 }, { "epoch": 0.53, "grad_norm": 2.7890894029086764, "learning_rate": 9.52177992263982e-06, "loss": 0.8804, "step": 3461 }, { "epoch": 0.53, "grad_norm": 2.6054412774153697, "learning_rate": 9.516828114363695e-06, "loss": 0.7928, "step": 3462 }, { "epoch": 0.53, "grad_norm": 2.510783295864765, "learning_rate": 9.51187642483767e-06, "loss": 0.9286, "step": 3463 }, { "epoch": 0.53, "grad_norm": 2.600019031689135, "learning_rate": 9.50692485527873e-06, "loss": 0.7392, "step": 3464 }, { "epoch": 0.53, "grad_norm": 2.6241823466708336, "learning_rate": 9.501973406903827e-06, "loss": 0.8606, "step": 3465 }, { "epoch": 0.53, "grad_norm": 9.190637646959537, "learning_rate": 9.497022080929898e-06, "loss": 0.9424, "step": 3466 }, { "epoch": 0.53, "grad_norm": 2.564623714774583, "learning_rate": 9.492070878573835e-06, "loss": 0.8429, "step": 3467 }, { "epoch": 0.53, "grad_norm": 2.668379840486614, "learning_rate": 9.487119801052502e-06, "loss": 0.8477, "step": 3468 }, { "epoch": 0.53, "grad_norm": 2.569726689880901, "learning_rate": 9.48216884958274e-06, "loss": 0.7003, "step": 3469 }, { "epoch": 0.53, "grad_norm": 2.5774518981436936, "learning_rate": 9.477218025381351e-06, "loss": 0.7733, "step": 3470 }, { "epoch": 0.53, "grad_norm": 2.8184807477848963, "learning_rate": 9.472267329665107e-06, "loss": 0.7704, "step": 3471 }, { "epoch": 0.53, "grad_norm": 2.489332514227551, "learning_rate": 9.467316763650755e-06, "loss": 0.8022, "step": 3472 }, { "epoch": 0.53, "grad_norm": 2.741348982843612, "learning_rate": 9.462366328555e-06, "loss": 0.8633, "step": 3473 }, { "epoch": 0.53, "grad_norm": 2.8905647730408908, "learning_rate": 9.457416025594528e-06, "loss": 0.9357, "step": 3474 }, { "epoch": 0.53, "grad_norm": 2.951366616887644, "learning_rate": 9.452465855985978e-06, "loss": 0.8145, "step": 3475 }, { "epoch": 0.53, "grad_norm": 3.4285873693290116, "learning_rate": 9.447515820945959e-06, "loss": 0.7857, "step": 3476 }, { "epoch": 0.53, "grad_norm": 2.815876716660041, "learning_rate": 9.442565921691062e-06, "loss": 0.7452, "step": 3477 }, { "epoch": 0.53, "grad_norm": 2.5491022711639046, "learning_rate": 9.437616159437828e-06, "loss": 0.8215, "step": 3478 }, { "epoch": 0.53, "grad_norm": 2.5599892020044384, "learning_rate": 9.432666535402764e-06, "loss": 0.7299, "step": 3479 }, { "epoch": 0.53, "grad_norm": 2.7040859098807526, "learning_rate": 9.427717050802359e-06, "loss": 0.811, "step": 3480 }, { "epoch": 0.53, "grad_norm": 2.9344148037173694, "learning_rate": 9.42276770685305e-06, "loss": 0.9246, "step": 3481 }, { "epoch": 0.53, "grad_norm": 5.906786834299944, "learning_rate": 9.41781850477125e-06, "loss": 0.9404, "step": 3482 }, { "epoch": 0.53, "grad_norm": 2.566431527013668, "learning_rate": 9.412869445773338e-06, "loss": 0.8971, "step": 3483 }, { "epoch": 0.53, "grad_norm": 2.8506725057017905, "learning_rate": 9.407920531075641e-06, "loss": 0.8048, "step": 3484 }, { "epoch": 0.53, "grad_norm": 4.215499929654718, "learning_rate": 9.40297176189448e-06, "loss": 0.9818, "step": 3485 }, { "epoch": 0.53, "grad_norm": 2.8119702972456717, "learning_rate": 9.398023139446113e-06, "loss": 0.8954, "step": 3486 }, { "epoch": 0.53, "grad_norm": 2.8743448207774582, "learning_rate": 9.393074664946773e-06, "loss": 0.9275, "step": 3487 }, { "epoch": 0.53, "grad_norm": 2.6264244278472773, "learning_rate": 9.388126339612661e-06, "loss": 0.8055, "step": 3488 }, { "epoch": 0.53, "grad_norm": 2.786429666020273, "learning_rate": 9.383178164659935e-06, "loss": 0.7912, "step": 3489 }, { "epoch": 0.53, "grad_norm": 2.96515087237739, "learning_rate": 9.378230141304711e-06, "loss": 0.9173, "step": 3490 }, { "epoch": 0.53, "grad_norm": 2.685269953318423, "learning_rate": 9.373282270763087e-06, "loss": 0.8394, "step": 3491 }, { "epoch": 0.53, "grad_norm": 2.8070326631129587, "learning_rate": 9.368334554251099e-06, "loss": 0.9085, "step": 3492 }, { "epoch": 0.53, "grad_norm": 2.79038954283755, "learning_rate": 9.363386992984765e-06, "loss": 0.8801, "step": 3493 }, { "epoch": 0.53, "grad_norm": 2.9430808628170833, "learning_rate": 9.358439588180053e-06, "loss": 0.8405, "step": 3494 }, { "epoch": 0.53, "grad_norm": 2.6887706037474057, "learning_rate": 9.353492341052894e-06, "loss": 0.7708, "step": 3495 }, { "epoch": 0.54, "grad_norm": 2.506287273436747, "learning_rate": 9.34854525281919e-06, "loss": 0.8898, "step": 3496 }, { "epoch": 0.54, "grad_norm": 2.798243828822919, "learning_rate": 9.34359832469479e-06, "loss": 0.8658, "step": 3497 }, { "epoch": 0.54, "grad_norm": 2.8401541321532227, "learning_rate": 9.338651557895513e-06, "loss": 0.8637, "step": 3498 }, { "epoch": 0.54, "grad_norm": 2.4414548026642198, "learning_rate": 9.333704953637135e-06, "loss": 0.8658, "step": 3499 }, { "epoch": 0.54, "grad_norm": 2.6143105878878985, "learning_rate": 9.328758513135393e-06, "loss": 0.7415, "step": 3500 }, { "epoch": 0.54, "grad_norm": 2.509734906422458, "learning_rate": 9.323812237605977e-06, "loss": 0.8148, "step": 3501 }, { "epoch": 0.54, "grad_norm": 2.55941722209386, "learning_rate": 9.318866128264556e-06, "loss": 0.7698, "step": 3502 }, { "epoch": 0.54, "grad_norm": 2.6536660992690133, "learning_rate": 9.313920186326734e-06, "loss": 0.7173, "step": 3503 }, { "epoch": 0.54, "grad_norm": 2.5321252514402866, "learning_rate": 9.30897441300809e-06, "loss": 0.8794, "step": 3504 }, { "epoch": 0.54, "grad_norm": 2.9748037974758677, "learning_rate": 9.304028809524154e-06, "loss": 0.9203, "step": 3505 }, { "epoch": 0.54, "grad_norm": 2.852371008150815, "learning_rate": 9.299083377090415e-06, "loss": 0.811, "step": 3506 }, { "epoch": 0.54, "grad_norm": 2.8329209503983774, "learning_rate": 9.294138116922328e-06, "loss": 0.8725, "step": 3507 }, { "epoch": 0.54, "grad_norm": 2.556917873688633, "learning_rate": 9.289193030235293e-06, "loss": 0.8832, "step": 3508 }, { "epoch": 0.54, "grad_norm": 2.7490077769808483, "learning_rate": 9.284248118244676e-06, "loss": 0.8685, "step": 3509 }, { "epoch": 0.54, "grad_norm": 2.821699231197627, "learning_rate": 9.2793033821658e-06, "loss": 0.8822, "step": 3510 }, { "epoch": 0.54, "grad_norm": 2.62568208135973, "learning_rate": 9.274358823213938e-06, "loss": 0.8229, "step": 3511 }, { "epoch": 0.54, "grad_norm": 3.3221816368186157, "learning_rate": 9.269414442604324e-06, "loss": 0.9029, "step": 3512 }, { "epoch": 0.54, "grad_norm": 2.5729726401385427, "learning_rate": 9.264470241552152e-06, "loss": 0.7594, "step": 3513 }, { "epoch": 0.54, "grad_norm": 2.613962094609536, "learning_rate": 9.259526221272564e-06, "loss": 0.8595, "step": 3514 }, { "epoch": 0.54, "grad_norm": 2.7485831111727084, "learning_rate": 9.254582382980667e-06, "loss": 0.7865, "step": 3515 }, { "epoch": 0.54, "grad_norm": 2.622037998037098, "learning_rate": 9.249638727891513e-06, "loss": 0.9312, "step": 3516 }, { "epoch": 0.54, "grad_norm": 2.4585507034727847, "learning_rate": 9.24469525722011e-06, "loss": 0.7725, "step": 3517 }, { "epoch": 0.54, "grad_norm": 2.491230089556136, "learning_rate": 9.239751972181435e-06, "loss": 0.8587, "step": 3518 }, { "epoch": 0.54, "grad_norm": 2.6129819112402437, "learning_rate": 9.234808873990405e-06, "loss": 0.8502, "step": 3519 }, { "epoch": 0.54, "grad_norm": 2.5048705499962733, "learning_rate": 9.229865963861888e-06, "loss": 0.7968, "step": 3520 }, { "epoch": 0.54, "grad_norm": 2.6486800302992606, "learning_rate": 9.224923243010722e-06, "loss": 0.8275, "step": 3521 }, { "epoch": 0.54, "grad_norm": 2.7180029217975923, "learning_rate": 9.219980712651684e-06, "loss": 0.7334, "step": 3522 }, { "epoch": 0.54, "grad_norm": 2.600823388647472, "learning_rate": 9.215038373999507e-06, "loss": 0.8259, "step": 3523 }, { "epoch": 0.54, "grad_norm": 2.842235272446629, "learning_rate": 9.210096228268885e-06, "loss": 0.8671, "step": 3524 }, { "epoch": 0.54, "grad_norm": 4.692287898296294, "learning_rate": 9.205154276674456e-06, "loss": 0.9264, "step": 3525 }, { "epoch": 0.54, "grad_norm": 2.682372073940632, "learning_rate": 9.200212520430814e-06, "loss": 0.8054, "step": 3526 }, { "epoch": 0.54, "grad_norm": 4.928798676688956, "learning_rate": 9.195270960752505e-06, "loss": 0.998, "step": 3527 }, { "epoch": 0.54, "grad_norm": 2.5156680957421016, "learning_rate": 9.19032959885402e-06, "loss": 0.7873, "step": 3528 }, { "epoch": 0.54, "grad_norm": 2.643942704387546, "learning_rate": 9.185388435949815e-06, "loss": 0.7548, "step": 3529 }, { "epoch": 0.54, "grad_norm": 2.65169439065476, "learning_rate": 9.180447473254289e-06, "loss": 0.8838, "step": 3530 }, { "epoch": 0.54, "grad_norm": 2.6752622485002098, "learning_rate": 9.175506711981782e-06, "loss": 0.8896, "step": 3531 }, { "epoch": 0.54, "grad_norm": 2.7589924845247444, "learning_rate": 9.170566153346606e-06, "loss": 0.8725, "step": 3532 }, { "epoch": 0.54, "grad_norm": 2.506078803726627, "learning_rate": 9.165625798563007e-06, "loss": 0.7382, "step": 3533 }, { "epoch": 0.54, "grad_norm": 2.53520520564263, "learning_rate": 9.160685648845182e-06, "loss": 0.8571, "step": 3534 }, { "epoch": 0.54, "grad_norm": 2.607123422142902, "learning_rate": 9.155745705407288e-06, "loss": 0.9119, "step": 3535 }, { "epoch": 0.54, "grad_norm": 2.5822010527861434, "learning_rate": 9.15080596946342e-06, "loss": 0.7983, "step": 3536 }, { "epoch": 0.54, "grad_norm": 2.5685810562247813, "learning_rate": 9.145866442227632e-06, "loss": 0.8518, "step": 3537 }, { "epoch": 0.54, "grad_norm": 2.535089245635708, "learning_rate": 9.140927124913915e-06, "loss": 0.6993, "step": 3538 }, { "epoch": 0.54, "grad_norm": 2.7257069900543813, "learning_rate": 9.135988018736214e-06, "loss": 0.8749, "step": 3539 }, { "epoch": 0.54, "grad_norm": 2.711857668052274, "learning_rate": 9.13104912490843e-06, "loss": 0.833, "step": 3540 }, { "epoch": 0.54, "grad_norm": 2.5288441724978767, "learning_rate": 9.1261104446444e-06, "loss": 0.806, "step": 3541 }, { "epoch": 0.54, "grad_norm": 2.9774721456587407, "learning_rate": 9.121171979157912e-06, "loss": 0.9085, "step": 3542 }, { "epoch": 0.54, "grad_norm": 2.5387614093810194, "learning_rate": 9.116233729662705e-06, "loss": 0.784, "step": 3543 }, { "epoch": 0.54, "grad_norm": 2.9467484789677543, "learning_rate": 9.111295697372463e-06, "loss": 0.9633, "step": 3544 }, { "epoch": 0.54, "grad_norm": 2.9757437755654497, "learning_rate": 9.106357883500808e-06, "loss": 0.8032, "step": 3545 }, { "epoch": 0.54, "grad_norm": 2.5998054749396022, "learning_rate": 9.101420289261327e-06, "loss": 0.8322, "step": 3546 }, { "epoch": 0.54, "grad_norm": 2.7217799386307995, "learning_rate": 9.096482915867535e-06, "loss": 0.9016, "step": 3547 }, { "epoch": 0.54, "grad_norm": 2.7475901842215342, "learning_rate": 9.091545764532905e-06, "loss": 0.8422, "step": 3548 }, { "epoch": 0.54, "grad_norm": 2.904223424761576, "learning_rate": 9.086608836470847e-06, "loss": 0.8544, "step": 3549 }, { "epoch": 0.54, "grad_norm": 2.8385256278455, "learning_rate": 9.081672132894716e-06, "loss": 0.8998, "step": 3550 }, { "epoch": 0.54, "grad_norm": 6.538160224033064, "learning_rate": 9.076735655017822e-06, "loss": 1.0688, "step": 3551 }, { "epoch": 0.54, "grad_norm": 2.6527168081088623, "learning_rate": 9.071799404053412e-06, "loss": 0.8223, "step": 3552 }, { "epoch": 0.54, "grad_norm": 2.9676891853864023, "learning_rate": 9.066863381214672e-06, "loss": 0.8399, "step": 3553 }, { "epoch": 0.54, "grad_norm": 2.685845974353761, "learning_rate": 9.061927587714747e-06, "loss": 0.9156, "step": 3554 }, { "epoch": 0.54, "grad_norm": 2.724001222133191, "learning_rate": 9.056992024766706e-06, "loss": 0.8723, "step": 3555 }, { "epoch": 0.54, "grad_norm": 2.4897993153457327, "learning_rate": 9.05205669358358e-06, "loss": 0.7244, "step": 3556 }, { "epoch": 0.54, "grad_norm": 2.6163874330228825, "learning_rate": 9.047121595378335e-06, "loss": 0.9575, "step": 3557 }, { "epoch": 0.54, "grad_norm": 2.471137288002054, "learning_rate": 9.042186731363876e-06, "loss": 0.8606, "step": 3558 }, { "epoch": 0.54, "grad_norm": 2.8295381734389005, "learning_rate": 9.037252102753056e-06, "loss": 0.866, "step": 3559 }, { "epoch": 0.54, "grad_norm": 2.5518594589083032, "learning_rate": 9.032317710758668e-06, "loss": 0.7769, "step": 3560 }, { "epoch": 0.55, "grad_norm": 2.6965298501742114, "learning_rate": 9.027383556593443e-06, "loss": 0.818, "step": 3561 }, { "epoch": 0.55, "grad_norm": 2.536115670826026, "learning_rate": 9.022449641470066e-06, "loss": 0.7124, "step": 3562 }, { "epoch": 0.55, "grad_norm": 2.588450203478518, "learning_rate": 9.017515966601152e-06, "loss": 0.972, "step": 3563 }, { "epoch": 0.55, "grad_norm": 3.7909023955013934, "learning_rate": 9.012582533199254e-06, "loss": 0.9889, "step": 3564 }, { "epoch": 0.55, "grad_norm": 2.6482008686874416, "learning_rate": 9.00764934247688e-06, "loss": 0.8659, "step": 3565 }, { "epoch": 0.55, "grad_norm": 2.8874019494064798, "learning_rate": 9.002716395646462e-06, "loss": 0.8602, "step": 3566 }, { "epoch": 0.55, "grad_norm": 2.6378105278298722, "learning_rate": 8.997783693920387e-06, "loss": 0.8591, "step": 3567 }, { "epoch": 0.55, "grad_norm": 2.6620268667766327, "learning_rate": 8.992851238510972e-06, "loss": 0.8449, "step": 3568 }, { "epoch": 0.55, "grad_norm": 2.633541484655814, "learning_rate": 8.987919030630474e-06, "loss": 0.8495, "step": 3569 }, { "epoch": 0.55, "grad_norm": 2.397040476582493, "learning_rate": 8.982987071491097e-06, "loss": 0.8061, "step": 3570 }, { "epoch": 0.55, "grad_norm": 2.5219829545734362, "learning_rate": 8.978055362304974e-06, "loss": 0.8393, "step": 3571 }, { "epoch": 0.55, "grad_norm": 2.586610187878668, "learning_rate": 8.973123904284175e-06, "loss": 0.7636, "step": 3572 }, { "epoch": 0.55, "grad_norm": 2.7685865321112684, "learning_rate": 8.968192698640728e-06, "loss": 0.8546, "step": 3573 }, { "epoch": 0.55, "grad_norm": 2.8152562626440734, "learning_rate": 8.963261746586576e-06, "loss": 0.8472, "step": 3574 }, { "epoch": 0.55, "grad_norm": 2.470179452309494, "learning_rate": 8.958331049333608e-06, "loss": 0.8264, "step": 3575 }, { "epoch": 0.55, "grad_norm": 2.5236760418062016, "learning_rate": 8.953400608093655e-06, "loss": 0.8112, "step": 3576 }, { "epoch": 0.55, "grad_norm": 3.1871526471365152, "learning_rate": 8.948470424078477e-06, "loss": 0.8764, "step": 3577 }, { "epoch": 0.55, "grad_norm": 4.390788855245002, "learning_rate": 8.94354049849978e-06, "loss": 1.0057, "step": 3578 }, { "epoch": 0.55, "grad_norm": 2.7210093186803923, "learning_rate": 8.9386108325692e-06, "loss": 0.9305, "step": 3579 }, { "epoch": 0.55, "grad_norm": 2.9855834421533496, "learning_rate": 8.933681427498308e-06, "loss": 0.9511, "step": 3580 }, { "epoch": 0.55, "grad_norm": 2.7666441197229914, "learning_rate": 8.928752284498616e-06, "loss": 0.8362, "step": 3581 }, { "epoch": 0.55, "grad_norm": 2.751965748661896, "learning_rate": 8.923823404781569e-06, "loss": 0.8808, "step": 3582 }, { "epoch": 0.55, "grad_norm": 2.576050038453813, "learning_rate": 8.91889478955854e-06, "loss": 0.7936, "step": 3583 }, { "epoch": 0.55, "grad_norm": 2.8137578824285945, "learning_rate": 8.913966440040858e-06, "loss": 0.8241, "step": 3584 }, { "epoch": 0.55, "grad_norm": 2.6778875907500406, "learning_rate": 8.909038357439767e-06, "loss": 0.7843, "step": 3585 }, { "epoch": 0.55, "grad_norm": 2.668733297014157, "learning_rate": 8.904110542966446e-06, "loss": 0.7914, "step": 3586 }, { "epoch": 0.55, "grad_norm": 2.6923499450062893, "learning_rate": 8.89918299783202e-06, "loss": 0.8458, "step": 3587 }, { "epoch": 0.55, "grad_norm": 2.632803767921514, "learning_rate": 8.894255723247536e-06, "loss": 0.8983, "step": 3588 }, { "epoch": 0.55, "grad_norm": 2.5401591385553277, "learning_rate": 8.88932872042399e-06, "loss": 0.7862, "step": 3589 }, { "epoch": 0.55, "grad_norm": 2.7332741048875278, "learning_rate": 8.884401990572293e-06, "loss": 0.9888, "step": 3590 }, { "epoch": 0.55, "grad_norm": 2.7439162578949094, "learning_rate": 8.879475534903292e-06, "loss": 0.8091, "step": 3591 }, { "epoch": 0.55, "grad_norm": 3.2872806339100715, "learning_rate": 8.874549354627786e-06, "loss": 0.9352, "step": 3592 }, { "epoch": 0.55, "grad_norm": 2.7685688420362307, "learning_rate": 8.869623450956484e-06, "loss": 0.8611, "step": 3593 }, { "epoch": 0.55, "grad_norm": 2.736941628903649, "learning_rate": 8.86469782510003e-06, "loss": 0.7929, "step": 3594 }, { "epoch": 0.55, "grad_norm": 2.58569671282389, "learning_rate": 8.859772478269013e-06, "loss": 0.8211, "step": 3595 }, { "epoch": 0.55, "grad_norm": 3.025888336572811, "learning_rate": 8.854847411673944e-06, "loss": 0.8808, "step": 3596 }, { "epoch": 0.55, "grad_norm": 2.774503519864151, "learning_rate": 8.849922626525258e-06, "loss": 0.8471, "step": 3597 }, { "epoch": 0.55, "grad_norm": 2.652018012156596, "learning_rate": 8.844998124033339e-06, "loss": 0.8309, "step": 3598 }, { "epoch": 0.55, "grad_norm": 2.7737368141511816, "learning_rate": 8.840073905408488e-06, "loss": 0.797, "step": 3599 }, { "epoch": 0.55, "grad_norm": 2.631941056407336, "learning_rate": 8.83514997186094e-06, "loss": 0.7881, "step": 3600 }, { "epoch": 0.55, "grad_norm": 3.2951018055617, "learning_rate": 8.83022632460086e-06, "loss": 0.808, "step": 3601 }, { "epoch": 0.55, "grad_norm": 2.754968213283891, "learning_rate": 8.825302964838337e-06, "loss": 0.8158, "step": 3602 }, { "epoch": 0.55, "grad_norm": 2.7676551447009947, "learning_rate": 8.820379893783406e-06, "loss": 0.7856, "step": 3603 }, { "epoch": 0.55, "grad_norm": 2.3558550948620947, "learning_rate": 8.815457112646012e-06, "loss": 0.8038, "step": 3604 }, { "epoch": 0.55, "grad_norm": 2.9201439057338137, "learning_rate": 8.810534622636035e-06, "loss": 0.8423, "step": 3605 }, { "epoch": 0.55, "grad_norm": 4.485876965646072, "learning_rate": 8.805612424963293e-06, "loss": 0.9396, "step": 3606 }, { "epoch": 0.55, "grad_norm": 3.05468597846747, "learning_rate": 8.800690520837516e-06, "loss": 0.8926, "step": 3607 }, { "epoch": 0.55, "grad_norm": 2.6860278468379533, "learning_rate": 8.79576891146837e-06, "loss": 0.8396, "step": 3608 }, { "epoch": 0.55, "grad_norm": 2.4171544778269194, "learning_rate": 8.790847598065457e-06, "loss": 0.8286, "step": 3609 }, { "epoch": 0.55, "grad_norm": 3.84210486687714, "learning_rate": 8.785926581838288e-06, "loss": 0.9437, "step": 3610 }, { "epoch": 0.55, "grad_norm": 3.015621670490844, "learning_rate": 8.781005863996318e-06, "loss": 0.9002, "step": 3611 }, { "epoch": 0.55, "grad_norm": 2.6779526247346066, "learning_rate": 8.77608544574892e-06, "loss": 0.8747, "step": 3612 }, { "epoch": 0.55, "grad_norm": 3.650536560154308, "learning_rate": 8.771165328305387e-06, "loss": 0.8957, "step": 3613 }, { "epoch": 0.55, "grad_norm": 2.6917203323536785, "learning_rate": 8.766245512874959e-06, "loss": 0.886, "step": 3614 }, { "epoch": 0.55, "grad_norm": 2.6663991698263834, "learning_rate": 8.76132600066678e-06, "loss": 0.7626, "step": 3615 }, { "epoch": 0.55, "grad_norm": 2.620435279245936, "learning_rate": 8.75640679288993e-06, "loss": 0.7998, "step": 3616 }, { "epoch": 0.55, "grad_norm": 2.506585621530046, "learning_rate": 8.751487890753414e-06, "loss": 0.8461, "step": 3617 }, { "epoch": 0.55, "grad_norm": 2.427248465159573, "learning_rate": 8.746569295466158e-06, "loss": 0.854, "step": 3618 }, { "epoch": 0.55, "grad_norm": 2.886733992698023, "learning_rate": 8.741651008237012e-06, "loss": 0.9957, "step": 3619 }, { "epoch": 0.55, "grad_norm": 2.5762624317434146, "learning_rate": 8.73673303027476e-06, "loss": 0.893, "step": 3620 }, { "epoch": 0.55, "grad_norm": 3.0155415002945727, "learning_rate": 8.731815362788097e-06, "loss": 0.8848, "step": 3621 }, { "epoch": 0.55, "grad_norm": 3.077168323117745, "learning_rate": 8.72689800698565e-06, "loss": 0.8263, "step": 3622 }, { "epoch": 0.55, "grad_norm": 2.7555288356242706, "learning_rate": 8.721980964075971e-06, "loss": 0.7863, "step": 3623 }, { "epoch": 0.55, "grad_norm": 2.5669626707976776, "learning_rate": 8.717064235267523e-06, "loss": 0.8099, "step": 3624 }, { "epoch": 0.55, "grad_norm": 2.9058792524652364, "learning_rate": 8.712147821768708e-06, "loss": 0.8589, "step": 3625 }, { "epoch": 0.56, "grad_norm": 2.5965076505939697, "learning_rate": 8.70723172478784e-06, "loss": 0.8579, "step": 3626 }, { "epoch": 0.56, "grad_norm": 2.8274905558414374, "learning_rate": 8.702315945533156e-06, "loss": 0.8621, "step": 3627 }, { "epoch": 0.56, "grad_norm": 2.630215743420575, "learning_rate": 8.697400485212816e-06, "loss": 0.9039, "step": 3628 }, { "epoch": 0.56, "grad_norm": 2.6932115439415667, "learning_rate": 8.692485345034903e-06, "loss": 0.8897, "step": 3629 }, { "epoch": 0.56, "grad_norm": 2.9440595153618623, "learning_rate": 8.687570526207425e-06, "loss": 0.8714, "step": 3630 }, { "epoch": 0.56, "grad_norm": 2.603288212077659, "learning_rate": 8.682656029938304e-06, "loss": 0.8508, "step": 3631 }, { "epoch": 0.56, "grad_norm": 2.677093356005969, "learning_rate": 8.67774185743538e-06, "loss": 0.8141, "step": 3632 }, { "epoch": 0.56, "grad_norm": 2.3662069721673933, "learning_rate": 8.672828009906427e-06, "loss": 0.8724, "step": 3633 }, { "epoch": 0.56, "grad_norm": 2.53071609090638, "learning_rate": 8.667914488559128e-06, "loss": 0.8556, "step": 3634 }, { "epoch": 0.56, "grad_norm": 2.7326135410952004, "learning_rate": 8.663001294601082e-06, "loss": 0.789, "step": 3635 }, { "epoch": 0.56, "grad_norm": 5.10132310788602, "learning_rate": 8.658088429239826e-06, "loss": 1.0455, "step": 3636 }, { "epoch": 0.56, "grad_norm": 2.87349936884276, "learning_rate": 8.653175893682798e-06, "loss": 0.8204, "step": 3637 }, { "epoch": 0.56, "grad_norm": 2.763286521179671, "learning_rate": 8.648263689137361e-06, "loss": 0.8436, "step": 3638 }, { "epoch": 0.56, "grad_norm": 2.6273730030188003, "learning_rate": 8.643351816810798e-06, "loss": 0.7951, "step": 3639 }, { "epoch": 0.56, "grad_norm": 2.917254214247303, "learning_rate": 8.638440277910308e-06, "loss": 0.8058, "step": 3640 }, { "epoch": 0.56, "grad_norm": 2.3002616303651755, "learning_rate": 8.633529073643015e-06, "loss": 0.7621, "step": 3641 }, { "epoch": 0.56, "grad_norm": 2.413059539401685, "learning_rate": 8.628618205215952e-06, "loss": 0.8709, "step": 3642 }, { "epoch": 0.56, "grad_norm": 2.5414190420762237, "learning_rate": 8.62370767383607e-06, "loss": 0.8097, "step": 3643 }, { "epoch": 0.56, "grad_norm": 2.481571621621111, "learning_rate": 8.618797480710244e-06, "loss": 0.8251, "step": 3644 }, { "epoch": 0.56, "grad_norm": 2.5172550773810687, "learning_rate": 8.613887627045259e-06, "loss": 0.9102, "step": 3645 }, { "epoch": 0.56, "grad_norm": 2.670664180733619, "learning_rate": 8.608978114047818e-06, "loss": 0.8178, "step": 3646 }, { "epoch": 0.56, "grad_norm": 2.606139471469613, "learning_rate": 8.604068942924546e-06, "loss": 0.8516, "step": 3647 }, { "epoch": 0.56, "grad_norm": 2.82664466148121, "learning_rate": 8.599160114881979e-06, "loss": 0.9226, "step": 3648 }, { "epoch": 0.56, "grad_norm": 2.523005606895564, "learning_rate": 8.594251631126566e-06, "loss": 0.8452, "step": 3649 }, { "epoch": 0.56, "grad_norm": 6.5205110535151665, "learning_rate": 8.589343492864677e-06, "loss": 0.9898, "step": 3650 }, { "epoch": 0.56, "grad_norm": 2.9766710161932304, "learning_rate": 8.58443570130259e-06, "loss": 0.7819, "step": 3651 }, { "epoch": 0.56, "grad_norm": 2.6300474432151995, "learning_rate": 8.579528257646512e-06, "loss": 0.8238, "step": 3652 }, { "epoch": 0.56, "grad_norm": 2.6781706303509085, "learning_rate": 8.574621163102548e-06, "loss": 0.8303, "step": 3653 }, { "epoch": 0.56, "grad_norm": 2.8478976294309364, "learning_rate": 8.569714418876726e-06, "loss": 0.8185, "step": 3654 }, { "epoch": 0.56, "grad_norm": 2.675348904783975, "learning_rate": 8.564808026174987e-06, "loss": 0.8694, "step": 3655 }, { "epoch": 0.56, "grad_norm": 2.645107465382493, "learning_rate": 8.559901986203183e-06, "loss": 0.8213, "step": 3656 }, { "epoch": 0.56, "grad_norm": 2.7247460217826753, "learning_rate": 8.554996300167078e-06, "loss": 0.7998, "step": 3657 }, { "epoch": 0.56, "grad_norm": 2.5975898264618977, "learning_rate": 8.55009096927236e-06, "loss": 0.8683, "step": 3658 }, { "epoch": 0.56, "grad_norm": 2.768347542686713, "learning_rate": 8.545185994724619e-06, "loss": 0.8646, "step": 3659 }, { "epoch": 0.56, "grad_norm": 2.524499498032838, "learning_rate": 8.540281377729355e-06, "loss": 0.9274, "step": 3660 }, { "epoch": 0.56, "grad_norm": 2.5152003835677985, "learning_rate": 8.535377119491992e-06, "loss": 0.7848, "step": 3661 }, { "epoch": 0.56, "grad_norm": 11.956498767697864, "learning_rate": 8.53047322121785e-06, "loss": 0.9453, "step": 3662 }, { "epoch": 0.56, "grad_norm": 2.6256844278947464, "learning_rate": 8.525569684112181e-06, "loss": 0.6936, "step": 3663 }, { "epoch": 0.56, "grad_norm": 2.80453341383359, "learning_rate": 8.520666509380132e-06, "loss": 0.9192, "step": 3664 }, { "epoch": 0.56, "grad_norm": 2.6025782439130096, "learning_rate": 8.515763698226764e-06, "loss": 0.9011, "step": 3665 }, { "epoch": 0.56, "grad_norm": 2.6216848423824293, "learning_rate": 8.510861251857053e-06, "loss": 0.7981, "step": 3666 }, { "epoch": 0.56, "grad_norm": 2.526482661731994, "learning_rate": 8.505959171475883e-06, "loss": 0.7283, "step": 3667 }, { "epoch": 0.56, "grad_norm": 2.840409081076734, "learning_rate": 8.501057458288041e-06, "loss": 0.8971, "step": 3668 }, { "epoch": 0.56, "grad_norm": 2.6405701512694044, "learning_rate": 8.496156113498242e-06, "loss": 0.8458, "step": 3669 }, { "epoch": 0.56, "grad_norm": 2.647772783988904, "learning_rate": 8.491255138311093e-06, "loss": 0.8854, "step": 3670 }, { "epoch": 0.56, "grad_norm": 2.5549512183080996, "learning_rate": 8.486354533931114e-06, "loss": 0.7682, "step": 3671 }, { "epoch": 0.56, "grad_norm": 2.539104496539669, "learning_rate": 8.481454301562741e-06, "loss": 0.7779, "step": 3672 }, { "epoch": 0.56, "grad_norm": 3.0747522525924325, "learning_rate": 8.476554442410308e-06, "loss": 0.8221, "step": 3673 }, { "epoch": 0.56, "grad_norm": 2.833827224853693, "learning_rate": 8.47165495767807e-06, "loss": 0.8259, "step": 3674 }, { "epoch": 0.56, "grad_norm": 4.3111799161124615, "learning_rate": 8.46675584857018e-06, "loss": 0.9928, "step": 3675 }, { "epoch": 0.56, "grad_norm": 2.6904811253228873, "learning_rate": 8.461857116290698e-06, "loss": 0.8276, "step": 3676 }, { "epoch": 0.56, "grad_norm": 2.789560243636047, "learning_rate": 8.456958762043602e-06, "loss": 0.7791, "step": 3677 }, { "epoch": 0.56, "grad_norm": 2.7711953502100717, "learning_rate": 8.452060787032762e-06, "loss": 0.9128, "step": 3678 }, { "epoch": 0.56, "grad_norm": 2.7597117529170894, "learning_rate": 8.447163192461967e-06, "loss": 0.7911, "step": 3679 }, { "epoch": 0.56, "grad_norm": 2.758978197325863, "learning_rate": 8.44226597953491e-06, "loss": 0.8669, "step": 3680 }, { "epoch": 0.56, "grad_norm": 2.561596737247688, "learning_rate": 8.437369149455188e-06, "loss": 0.8433, "step": 3681 }, { "epoch": 0.56, "grad_norm": 4.7707246140287785, "learning_rate": 8.432472703426301e-06, "loss": 1.0088, "step": 3682 }, { "epoch": 0.56, "grad_norm": 2.5503950734906424, "learning_rate": 8.427576642651664e-06, "loss": 0.8231, "step": 3683 }, { "epoch": 0.56, "grad_norm": 2.6727623912158047, "learning_rate": 8.422680968334583e-06, "loss": 0.8313, "step": 3684 }, { "epoch": 0.56, "grad_norm": 2.737454750508601, "learning_rate": 8.417785681678286e-06, "loss": 0.8674, "step": 3685 }, { "epoch": 0.56, "grad_norm": 2.6719477754679586, "learning_rate": 8.412890783885894e-06, "loss": 0.9723, "step": 3686 }, { "epoch": 0.56, "grad_norm": 2.642320424254053, "learning_rate": 8.40799627616043e-06, "loss": 0.8246, "step": 3687 }, { "epoch": 0.56, "grad_norm": 2.709922300257362, "learning_rate": 8.40310215970484e-06, "loss": 0.8561, "step": 3688 }, { "epoch": 0.56, "grad_norm": 2.666770069476004, "learning_rate": 8.398208435721948e-06, "loss": 0.7958, "step": 3689 }, { "epoch": 0.56, "grad_norm": 2.7526148496402754, "learning_rate": 8.393315105414496e-06, "loss": 0.8502, "step": 3690 }, { "epoch": 0.56, "grad_norm": 2.6659138491305767, "learning_rate": 8.388422169985133e-06, "loss": 0.7236, "step": 3691 }, { "epoch": 0.57, "grad_norm": 2.5439661145398333, "learning_rate": 8.383529630636403e-06, "loss": 0.7353, "step": 3692 }, { "epoch": 0.57, "grad_norm": 2.724577989752663, "learning_rate": 8.378637488570753e-06, "loss": 0.8559, "step": 3693 }, { "epoch": 0.57, "grad_norm": 2.5940339598271605, "learning_rate": 8.373745744990536e-06, "loss": 0.9183, "step": 3694 }, { "epoch": 0.57, "grad_norm": 2.4445109767260633, "learning_rate": 8.368854401098001e-06, "loss": 0.8698, "step": 3695 }, { "epoch": 0.57, "grad_norm": 2.5851633527893316, "learning_rate": 8.363963458095312e-06, "loss": 0.9235, "step": 3696 }, { "epoch": 0.57, "grad_norm": 2.58465589391059, "learning_rate": 8.35907291718452e-06, "loss": 0.8486, "step": 3697 }, { "epoch": 0.57, "grad_norm": 2.4882994084186425, "learning_rate": 8.35418277956758e-06, "loss": 0.7139, "step": 3698 }, { "epoch": 0.57, "grad_norm": 2.8000076559062457, "learning_rate": 8.34929304644636e-06, "loss": 0.8835, "step": 3699 }, { "epoch": 0.57, "grad_norm": 2.770086107774786, "learning_rate": 8.34440371902261e-06, "loss": 0.8634, "step": 3700 }, { "epoch": 0.57, "grad_norm": 2.4079058389435675, "learning_rate": 8.33951479849799e-06, "loss": 0.7542, "step": 3701 }, { "epoch": 0.57, "grad_norm": 2.848969415151501, "learning_rate": 8.334626286074068e-06, "loss": 0.8225, "step": 3702 }, { "epoch": 0.57, "grad_norm": 2.7592930972688183, "learning_rate": 8.329738182952294e-06, "loss": 0.8484, "step": 3703 }, { "epoch": 0.57, "grad_norm": 2.6074480161638633, "learning_rate": 8.32485049033403e-06, "loss": 0.7946, "step": 3704 }, { "epoch": 0.57, "grad_norm": 2.7716119180727934, "learning_rate": 8.319963209420538e-06, "loss": 0.944, "step": 3705 }, { "epoch": 0.57, "grad_norm": 2.8029282681731065, "learning_rate": 8.315076341412966e-06, "loss": 0.7907, "step": 3706 }, { "epoch": 0.57, "grad_norm": 2.6892629916864452, "learning_rate": 8.310189887512376e-06, "loss": 0.9146, "step": 3707 }, { "epoch": 0.57, "grad_norm": 2.6734084490871024, "learning_rate": 8.305303848919721e-06, "loss": 0.7844, "step": 3708 }, { "epoch": 0.57, "grad_norm": 2.539063798010986, "learning_rate": 8.300418226835845e-06, "loss": 0.8068, "step": 3709 }, { "epoch": 0.57, "grad_norm": 3.9846429225063944, "learning_rate": 8.295533022461509e-06, "loss": 0.9278, "step": 3710 }, { "epoch": 0.57, "grad_norm": 2.6602592369524305, "learning_rate": 8.29064823699735e-06, "loss": 0.8223, "step": 3711 }, { "epoch": 0.57, "grad_norm": 2.416254247890502, "learning_rate": 8.285763871643913e-06, "loss": 0.9225, "step": 3712 }, { "epoch": 0.57, "grad_norm": 2.9016898319075652, "learning_rate": 8.28087992760164e-06, "loss": 0.8301, "step": 3713 }, { "epoch": 0.57, "grad_norm": 2.7287937427591302, "learning_rate": 8.275996406070862e-06, "loss": 0.8018, "step": 3714 }, { "epoch": 0.57, "grad_norm": 2.5050043115033396, "learning_rate": 8.27111330825182e-06, "loss": 0.8171, "step": 3715 }, { "epoch": 0.57, "grad_norm": 2.6532678696623444, "learning_rate": 8.266230635344639e-06, "loss": 0.8022, "step": 3716 }, { "epoch": 0.57, "grad_norm": 2.561582659958214, "learning_rate": 8.261348388549339e-06, "loss": 0.8913, "step": 3717 }, { "epoch": 0.57, "grad_norm": 3.179169065907958, "learning_rate": 8.256466569065848e-06, "loss": 0.8957, "step": 3718 }, { "epoch": 0.57, "grad_norm": 2.7464665614377495, "learning_rate": 8.251585178093973e-06, "loss": 0.869, "step": 3719 }, { "epoch": 0.57, "grad_norm": 2.5740469266074557, "learning_rate": 8.24670421683342e-06, "loss": 0.7636, "step": 3720 }, { "epoch": 0.57, "grad_norm": 2.625686248280815, "learning_rate": 8.241823686483803e-06, "loss": 0.9046, "step": 3721 }, { "epoch": 0.57, "grad_norm": 2.662924210522219, "learning_rate": 8.236943588244614e-06, "loss": 0.7854, "step": 3722 }, { "epoch": 0.57, "grad_norm": 3.002836458193394, "learning_rate": 8.232063923315241e-06, "loss": 0.7868, "step": 3723 }, { "epoch": 0.57, "grad_norm": 3.045339883343718, "learning_rate": 8.227184692894975e-06, "loss": 0.7397, "step": 3724 }, { "epoch": 0.57, "grad_norm": 2.628061506312594, "learning_rate": 8.222305898182988e-06, "loss": 0.9184, "step": 3725 }, { "epoch": 0.57, "grad_norm": 2.741478080492712, "learning_rate": 8.217427540378356e-06, "loss": 0.7048, "step": 3726 }, { "epoch": 0.57, "grad_norm": 2.432295343245831, "learning_rate": 8.212549620680042e-06, "loss": 0.8698, "step": 3727 }, { "epoch": 0.57, "grad_norm": 2.6309359477330854, "learning_rate": 8.207672140286897e-06, "loss": 0.801, "step": 3728 }, { "epoch": 0.57, "grad_norm": 2.5958707826515677, "learning_rate": 8.202795100397675e-06, "loss": 0.8624, "step": 3729 }, { "epoch": 0.57, "grad_norm": 2.463907049171007, "learning_rate": 8.197918502211012e-06, "loss": 0.7411, "step": 3730 }, { "epoch": 0.57, "grad_norm": 2.7184611770503855, "learning_rate": 8.193042346925436e-06, "loss": 0.8144, "step": 3731 }, { "epoch": 0.57, "grad_norm": 2.597451160371377, "learning_rate": 8.188166635739378e-06, "loss": 0.8072, "step": 3732 }, { "epoch": 0.57, "grad_norm": 2.4920600266531894, "learning_rate": 8.183291369851147e-06, "loss": 0.7994, "step": 3733 }, { "epoch": 0.57, "grad_norm": 2.836702439590227, "learning_rate": 8.178416550458942e-06, "loss": 0.8462, "step": 3734 }, { "epoch": 0.57, "grad_norm": 2.4212101587552, "learning_rate": 8.173542178760863e-06, "loss": 0.853, "step": 3735 }, { "epoch": 0.57, "grad_norm": 2.477224609844448, "learning_rate": 8.168668255954887e-06, "loss": 0.8548, "step": 3736 }, { "epoch": 0.57, "grad_norm": 2.78458703917749, "learning_rate": 8.163794783238896e-06, "loss": 0.8783, "step": 3737 }, { "epoch": 0.57, "grad_norm": 4.066389736990901, "learning_rate": 8.158921761810652e-06, "loss": 0.9896, "step": 3738 }, { "epoch": 0.57, "grad_norm": 2.6268166088866214, "learning_rate": 8.154049192867799e-06, "loss": 0.8734, "step": 3739 }, { "epoch": 0.57, "grad_norm": 2.6637608901708685, "learning_rate": 8.149177077607885e-06, "loss": 0.9008, "step": 3740 }, { "epoch": 0.57, "grad_norm": 2.8074823775308575, "learning_rate": 8.144305417228338e-06, "loss": 0.9087, "step": 3741 }, { "epoch": 0.57, "grad_norm": 2.5991639822857464, "learning_rate": 8.13943421292647e-06, "loss": 0.8284, "step": 3742 }, { "epoch": 0.57, "grad_norm": 2.3865845077404044, "learning_rate": 8.134563465899494e-06, "loss": 0.5926, "step": 3743 }, { "epoch": 0.57, "grad_norm": 3.0143104439623296, "learning_rate": 8.129693177344501e-06, "loss": 0.7634, "step": 3744 }, { "epoch": 0.57, "grad_norm": 3.311530155343317, "learning_rate": 8.124823348458469e-06, "loss": 0.7781, "step": 3745 }, { "epoch": 0.57, "grad_norm": 2.8112785294356226, "learning_rate": 8.119953980438268e-06, "loss": 0.8852, "step": 3746 }, { "epoch": 0.57, "grad_norm": 4.7930743638010975, "learning_rate": 8.115085074480646e-06, "loss": 0.9101, "step": 3747 }, { "epoch": 0.57, "grad_norm": 2.4819200007879494, "learning_rate": 8.110216631782252e-06, "loss": 0.7507, "step": 3748 }, { "epoch": 0.57, "grad_norm": 2.5773649983789944, "learning_rate": 8.10534865353961e-06, "loss": 0.9403, "step": 3749 }, { "epoch": 0.57, "grad_norm": 2.9531625641283825, "learning_rate": 8.10048114094913e-06, "loss": 0.8998, "step": 3750 }, { "epoch": 0.57, "grad_norm": 2.3980166625417954, "learning_rate": 8.095614095207114e-06, "loss": 0.8242, "step": 3751 }, { "epoch": 0.57, "grad_norm": 2.6056645544160584, "learning_rate": 8.09074751750974e-06, "loss": 0.798, "step": 3752 }, { "epoch": 0.57, "grad_norm": 2.463826485778903, "learning_rate": 8.085881409053077e-06, "loss": 0.7636, "step": 3753 }, { "epoch": 0.57, "grad_norm": 2.9734517602914123, "learning_rate": 8.081015771033084e-06, "loss": 0.6716, "step": 3754 }, { "epoch": 0.57, "grad_norm": 2.6689993987748344, "learning_rate": 8.076150604645596e-06, "loss": 0.7986, "step": 3755 }, { "epoch": 0.57, "grad_norm": 3.8901860718142007, "learning_rate": 8.07128591108633e-06, "loss": 0.9522, "step": 3756 }, { "epoch": 0.58, "grad_norm": 2.691387601662654, "learning_rate": 8.066421691550895e-06, "loss": 0.8825, "step": 3757 }, { "epoch": 0.58, "grad_norm": 2.521901884415322, "learning_rate": 8.061557947234776e-06, "loss": 0.8862, "step": 3758 }, { "epoch": 0.58, "grad_norm": 2.492459607271112, "learning_rate": 8.056694679333352e-06, "loss": 0.7939, "step": 3759 }, { "epoch": 0.58, "grad_norm": 2.661612562114557, "learning_rate": 8.051831889041874e-06, "loss": 0.9144, "step": 3760 }, { "epoch": 0.58, "grad_norm": 2.706362129998038, "learning_rate": 8.046969577555476e-06, "loss": 0.8429, "step": 3761 }, { "epoch": 0.58, "grad_norm": 2.472977941116828, "learning_rate": 8.042107746069186e-06, "loss": 0.8602, "step": 3762 }, { "epoch": 0.58, "grad_norm": 2.754451211879156, "learning_rate": 8.037246395777899e-06, "loss": 0.9238, "step": 3763 }, { "epoch": 0.58, "grad_norm": 2.481373688892135, "learning_rate": 8.032385527876395e-06, "loss": 0.8886, "step": 3764 }, { "epoch": 0.58, "grad_norm": 2.5611895981930513, "learning_rate": 8.02752514355935e-06, "loss": 0.8657, "step": 3765 }, { "epoch": 0.58, "grad_norm": 2.8296881650687014, "learning_rate": 8.022665244021305e-06, "loss": 0.8609, "step": 3766 }, { "epoch": 0.58, "grad_norm": 2.661495351257953, "learning_rate": 8.017805830456682e-06, "loss": 0.9261, "step": 3767 }, { "epoch": 0.58, "grad_norm": 2.661809405239875, "learning_rate": 8.012946904059798e-06, "loss": 0.7899, "step": 3768 }, { "epoch": 0.58, "grad_norm": 2.558394657590361, "learning_rate": 8.00808846602483e-06, "loss": 0.8469, "step": 3769 }, { "epoch": 0.58, "grad_norm": 2.8327158347851484, "learning_rate": 8.003230517545857e-06, "loss": 0.7845, "step": 3770 }, { "epoch": 0.58, "grad_norm": 2.4525659365371824, "learning_rate": 7.998373059816822e-06, "loss": 0.7724, "step": 3771 }, { "epoch": 0.58, "grad_norm": 2.603495304223118, "learning_rate": 7.993516094031548e-06, "loss": 0.9337, "step": 3772 }, { "epoch": 0.58, "grad_norm": 2.6672418683358936, "learning_rate": 7.988659621383746e-06, "loss": 0.842, "step": 3773 }, { "epoch": 0.58, "grad_norm": 2.877866056470712, "learning_rate": 7.983803643067e-06, "loss": 0.8995, "step": 3774 }, { "epoch": 0.58, "grad_norm": 2.6378115325206313, "learning_rate": 7.978948160274767e-06, "loss": 0.8989, "step": 3775 }, { "epoch": 0.58, "grad_norm": 2.9815579099601783, "learning_rate": 7.974093174200397e-06, "loss": 0.8495, "step": 3776 }, { "epoch": 0.58, "grad_norm": 3.132213870518248, "learning_rate": 7.96923868603711e-06, "loss": 0.8843, "step": 3777 }, { "epoch": 0.58, "grad_norm": 2.412788545789461, "learning_rate": 7.964384696977992e-06, "loss": 0.7961, "step": 3778 }, { "epoch": 0.58, "grad_norm": 2.370076785003066, "learning_rate": 7.959531208216028e-06, "loss": 0.7651, "step": 3779 }, { "epoch": 0.58, "grad_norm": 2.5746154436580557, "learning_rate": 7.954678220944064e-06, "loss": 0.878, "step": 3780 }, { "epoch": 0.58, "grad_norm": 2.6759180343235527, "learning_rate": 7.949825736354833e-06, "loss": 0.8155, "step": 3781 }, { "epoch": 0.58, "grad_norm": 2.6891425243501357, "learning_rate": 7.944973755640937e-06, "loss": 0.9401, "step": 3782 }, { "epoch": 0.58, "grad_norm": 2.516111779906683, "learning_rate": 7.940122279994853e-06, "loss": 0.9423, "step": 3783 }, { "epoch": 0.58, "grad_norm": 2.7621339373286307, "learning_rate": 7.935271310608946e-06, "loss": 0.9382, "step": 3784 }, { "epoch": 0.58, "grad_norm": 2.619415577360085, "learning_rate": 7.930420848675442e-06, "loss": 0.8197, "step": 3785 }, { "epoch": 0.58, "grad_norm": 4.245032498999451, "learning_rate": 7.925570895386443e-06, "loss": 0.9284, "step": 3786 }, { "epoch": 0.58, "grad_norm": 2.700494938164556, "learning_rate": 7.920721451933946e-06, "loss": 0.952, "step": 3787 }, { "epoch": 0.58, "grad_norm": 2.534211144265974, "learning_rate": 7.915872519509798e-06, "loss": 0.9005, "step": 3788 }, { "epoch": 0.58, "grad_norm": 2.960644175712867, "learning_rate": 7.911024099305733e-06, "loss": 0.8329, "step": 3789 }, { "epoch": 0.58, "grad_norm": 2.742678404898385, "learning_rate": 7.90617619251336e-06, "loss": 0.8715, "step": 3790 }, { "epoch": 0.58, "grad_norm": 2.627606307215524, "learning_rate": 7.901328800324149e-06, "loss": 0.7791, "step": 3791 }, { "epoch": 0.58, "grad_norm": 2.4158599573343573, "learning_rate": 7.896481923929464e-06, "loss": 0.7315, "step": 3792 }, { "epoch": 0.58, "grad_norm": 2.4810470882863247, "learning_rate": 7.891635564520527e-06, "loss": 0.7382, "step": 3793 }, { "epoch": 0.58, "grad_norm": 2.510165859368989, "learning_rate": 7.886789723288436e-06, "loss": 0.7769, "step": 3794 }, { "epoch": 0.58, "grad_norm": 2.5818045931789033, "learning_rate": 7.881944401424164e-06, "loss": 0.7249, "step": 3795 }, { "epoch": 0.58, "grad_norm": 2.7965746120569066, "learning_rate": 7.877099600118556e-06, "loss": 0.8596, "step": 3796 }, { "epoch": 0.58, "grad_norm": 2.868160776338824, "learning_rate": 7.872255320562324e-06, "loss": 0.8343, "step": 3797 }, { "epoch": 0.58, "grad_norm": 2.8908538497696297, "learning_rate": 7.867411563946061e-06, "loss": 0.9137, "step": 3798 }, { "epoch": 0.58, "grad_norm": 2.675812256480429, "learning_rate": 7.862568331460224e-06, "loss": 0.8901, "step": 3799 }, { "epoch": 0.58, "grad_norm": 2.7616648182460937, "learning_rate": 7.857725624295147e-06, "loss": 0.8093, "step": 3800 }, { "epoch": 0.58, "grad_norm": 2.7560563302006402, "learning_rate": 7.85288344364103e-06, "loss": 0.7587, "step": 3801 }, { "epoch": 0.58, "grad_norm": 2.902987278257522, "learning_rate": 7.848041790687938e-06, "loss": 0.809, "step": 3802 }, { "epoch": 0.58, "grad_norm": 3.9658013555750475, "learning_rate": 7.843200666625827e-06, "loss": 0.9063, "step": 3803 }, { "epoch": 0.58, "grad_norm": 2.7647583871656245, "learning_rate": 7.838360072644502e-06, "loss": 0.7302, "step": 3804 }, { "epoch": 0.58, "grad_norm": 2.6531480332408846, "learning_rate": 7.833520009933642e-06, "loss": 0.8479, "step": 3805 }, { "epoch": 0.58, "grad_norm": 2.4018411002880904, "learning_rate": 7.828680479682804e-06, "loss": 0.7875, "step": 3806 }, { "epoch": 0.58, "grad_norm": 2.6037494271984802, "learning_rate": 7.823841483081408e-06, "loss": 0.8453, "step": 3807 }, { "epoch": 0.58, "grad_norm": 2.5441498531562448, "learning_rate": 7.81900302131874e-06, "loss": 0.8459, "step": 3808 }, { "epoch": 0.58, "grad_norm": 2.4488141970955777, "learning_rate": 7.814165095583963e-06, "loss": 0.7218, "step": 3809 }, { "epoch": 0.58, "grad_norm": 2.697496931767143, "learning_rate": 7.809327707066098e-06, "loss": 0.875, "step": 3810 }, { "epoch": 0.58, "grad_norm": 2.7787096557710593, "learning_rate": 7.80449085695405e-06, "loss": 0.9643, "step": 3811 }, { "epoch": 0.58, "grad_norm": 2.997625552294042, "learning_rate": 7.799654546436572e-06, "loss": 0.9041, "step": 3812 }, { "epoch": 0.58, "grad_norm": 2.5589065401606454, "learning_rate": 7.79481877670229e-06, "loss": 0.7814, "step": 3813 }, { "epoch": 0.58, "grad_norm": 2.7670004774038652, "learning_rate": 7.78998354893971e-06, "loss": 0.7938, "step": 3814 }, { "epoch": 0.58, "grad_norm": 2.5168899500141206, "learning_rate": 7.785148864337192e-06, "loss": 0.8931, "step": 3815 }, { "epoch": 0.58, "grad_norm": 2.812391141474085, "learning_rate": 7.780314724082963e-06, "loss": 0.9063, "step": 3816 }, { "epoch": 0.58, "grad_norm": 2.719624742077618, "learning_rate": 7.775481129365126e-06, "loss": 0.7571, "step": 3817 }, { "epoch": 0.58, "grad_norm": 2.641408766865852, "learning_rate": 7.770648081371636e-06, "loss": 0.8086, "step": 3818 }, { "epoch": 0.58, "grad_norm": 2.5413481785443994, "learning_rate": 7.76581558129032e-06, "loss": 0.9068, "step": 3819 }, { "epoch": 0.58, "grad_norm": 2.567326515183025, "learning_rate": 7.760983630308877e-06, "loss": 0.8058, "step": 3820 }, { "epoch": 0.58, "grad_norm": 2.895640457109586, "learning_rate": 7.756152229614858e-06, "loss": 0.7857, "step": 3821 }, { "epoch": 0.59, "grad_norm": 2.705430270171675, "learning_rate": 7.751321380395696e-06, "loss": 0.7997, "step": 3822 }, { "epoch": 0.59, "grad_norm": 2.3761036254213566, "learning_rate": 7.74649108383867e-06, "loss": 0.7441, "step": 3823 }, { "epoch": 0.59, "grad_norm": 2.7108377611394814, "learning_rate": 7.741661341130931e-06, "loss": 0.8042, "step": 3824 }, { "epoch": 0.59, "grad_norm": 2.76401109437764, "learning_rate": 7.736832153459501e-06, "loss": 0.7993, "step": 3825 }, { "epoch": 0.59, "grad_norm": 2.6007382840432243, "learning_rate": 7.732003522011255e-06, "loss": 0.9539, "step": 3826 }, { "epoch": 0.59, "grad_norm": 2.8750454249231185, "learning_rate": 7.727175447972928e-06, "loss": 0.7493, "step": 3827 }, { "epoch": 0.59, "grad_norm": 2.7782634958747026, "learning_rate": 7.72234793253114e-06, "loss": 0.8482, "step": 3828 }, { "epoch": 0.59, "grad_norm": 2.4242192072212174, "learning_rate": 7.717520976872347e-06, "loss": 0.7813, "step": 3829 }, { "epoch": 0.59, "grad_norm": 2.8572436094490086, "learning_rate": 7.712694582182885e-06, "loss": 0.9205, "step": 3830 }, { "epoch": 0.59, "grad_norm": 2.8229708482287035, "learning_rate": 7.707868749648945e-06, "loss": 0.8363, "step": 3831 }, { "epoch": 0.59, "grad_norm": 2.5074469640257666, "learning_rate": 7.703043480456576e-06, "loss": 0.807, "step": 3832 }, { "epoch": 0.59, "grad_norm": 2.6569233219377844, "learning_rate": 7.698218775791704e-06, "loss": 0.7093, "step": 3833 }, { "epoch": 0.59, "grad_norm": 2.63041037066242, "learning_rate": 7.693394636840102e-06, "loss": 0.9002, "step": 3834 }, { "epoch": 0.59, "grad_norm": 4.1206808271085595, "learning_rate": 7.688571064787404e-06, "loss": 0.9588, "step": 3835 }, { "epoch": 0.59, "grad_norm": 2.6908652608398955, "learning_rate": 7.683748060819116e-06, "loss": 0.853, "step": 3836 }, { "epoch": 0.59, "grad_norm": 2.64015780770857, "learning_rate": 7.67892562612059e-06, "loss": 0.7764, "step": 3837 }, { "epoch": 0.59, "grad_norm": 3.161301991487249, "learning_rate": 7.674103761877045e-06, "loss": 0.8251, "step": 3838 }, { "epoch": 0.59, "grad_norm": 2.627704700289266, "learning_rate": 7.669282469273566e-06, "loss": 0.8344, "step": 3839 }, { "epoch": 0.59, "grad_norm": 2.7454896666097164, "learning_rate": 7.664461749495092e-06, "loss": 0.8359, "step": 3840 }, { "epoch": 0.59, "grad_norm": 2.714463524406809, "learning_rate": 7.659641603726413e-06, "loss": 0.748, "step": 3841 }, { "epoch": 0.59, "grad_norm": 2.788609811735884, "learning_rate": 7.654822033152192e-06, "loss": 0.8835, "step": 3842 }, { "epoch": 0.59, "grad_norm": 2.72491491081617, "learning_rate": 7.650003038956939e-06, "loss": 0.9077, "step": 3843 }, { "epoch": 0.59, "grad_norm": 2.6085472328025125, "learning_rate": 7.645184622325034e-06, "loss": 0.872, "step": 3844 }, { "epoch": 0.59, "grad_norm": 2.813389893202622, "learning_rate": 7.640366784440707e-06, "loss": 0.8672, "step": 3845 }, { "epoch": 0.59, "grad_norm": 2.326141636602756, "learning_rate": 7.635549526488042e-06, "loss": 0.7961, "step": 3846 }, { "epoch": 0.59, "grad_norm": 2.6334060331516205, "learning_rate": 7.630732849650993e-06, "loss": 0.8555, "step": 3847 }, { "epoch": 0.59, "grad_norm": 2.5144279864015444, "learning_rate": 7.625916755113361e-06, "loss": 0.8252, "step": 3848 }, { "epoch": 0.59, "grad_norm": 2.674159071621899, "learning_rate": 7.621101244058803e-06, "loss": 0.8819, "step": 3849 }, { "epoch": 0.59, "grad_norm": 2.7674109736473715, "learning_rate": 7.616286317670845e-06, "loss": 0.7824, "step": 3850 }, { "epoch": 0.59, "grad_norm": 2.7562266107062277, "learning_rate": 7.611471977132855e-06, "loss": 0.888, "step": 3851 }, { "epoch": 0.59, "grad_norm": 2.5461987223959257, "learning_rate": 7.606658223628063e-06, "loss": 0.8751, "step": 3852 }, { "epoch": 0.59, "grad_norm": 2.4483598091432293, "learning_rate": 7.601845058339557e-06, "loss": 0.8321, "step": 3853 }, { "epoch": 0.59, "grad_norm": 2.4859420768204363, "learning_rate": 7.5970324824502736e-06, "loss": 0.8101, "step": 3854 }, { "epoch": 0.59, "grad_norm": 3.020109170851338, "learning_rate": 7.5922204971430165e-06, "loss": 0.8826, "step": 3855 }, { "epoch": 0.59, "grad_norm": 2.692579456461489, "learning_rate": 7.587409103600433e-06, "loss": 0.7842, "step": 3856 }, { "epoch": 0.59, "grad_norm": 2.8599508800622364, "learning_rate": 7.582598303005026e-06, "loss": 0.8273, "step": 3857 }, { "epoch": 0.59, "grad_norm": 2.3605072679986843, "learning_rate": 7.577788096539158e-06, "loss": 0.8066, "step": 3858 }, { "epoch": 0.59, "grad_norm": 2.609277078943832, "learning_rate": 7.572978485385044e-06, "loss": 0.8033, "step": 3859 }, { "epoch": 0.59, "grad_norm": 4.195124199704877, "learning_rate": 7.5681694707247445e-06, "loss": 0.9971, "step": 3860 }, { "epoch": 0.59, "grad_norm": 2.482502952468572, "learning_rate": 7.563361053740191e-06, "loss": 0.7986, "step": 3861 }, { "epoch": 0.59, "grad_norm": 2.574590700574185, "learning_rate": 7.558553235613151e-06, "loss": 0.825, "step": 3862 }, { "epoch": 0.59, "grad_norm": 2.5775304906123413, "learning_rate": 7.553746017525253e-06, "loss": 0.8978, "step": 3863 }, { "epoch": 0.59, "grad_norm": 2.9492898868537, "learning_rate": 7.548939400657977e-06, "loss": 0.8328, "step": 3864 }, { "epoch": 0.59, "grad_norm": 2.71613382329485, "learning_rate": 7.544133386192649e-06, "loss": 0.7306, "step": 3865 }, { "epoch": 0.59, "grad_norm": 2.7268270443678637, "learning_rate": 7.539327975310463e-06, "loss": 0.8832, "step": 3866 }, { "epoch": 0.59, "grad_norm": 2.5607710626551627, "learning_rate": 7.534523169192447e-06, "loss": 0.7932, "step": 3867 }, { "epoch": 0.59, "grad_norm": 2.699562397322179, "learning_rate": 7.529718969019488e-06, "loss": 0.7144, "step": 3868 }, { "epoch": 0.59, "grad_norm": 2.570287154377048, "learning_rate": 7.524915375972327e-06, "loss": 0.9042, "step": 3869 }, { "epoch": 0.59, "grad_norm": 2.7691816343881577, "learning_rate": 7.520112391231549e-06, "loss": 0.9343, "step": 3870 }, { "epoch": 0.59, "grad_norm": 2.6067248356622845, "learning_rate": 7.515310015977591e-06, "loss": 0.7428, "step": 3871 }, { "epoch": 0.59, "grad_norm": 2.6809416644963053, "learning_rate": 7.510508251390749e-06, "loss": 0.7999, "step": 3872 }, { "epoch": 0.59, "grad_norm": 2.6413952753527137, "learning_rate": 7.505707098651157e-06, "loss": 0.7502, "step": 3873 }, { "epoch": 0.59, "grad_norm": 2.3759380993681276, "learning_rate": 7.500906558938806e-06, "loss": 0.7932, "step": 3874 }, { "epoch": 0.59, "grad_norm": 2.605686384220005, "learning_rate": 7.496106633433535e-06, "loss": 0.9078, "step": 3875 }, { "epoch": 0.59, "grad_norm": 2.4652523513354687, "learning_rate": 7.4913073233150236e-06, "loss": 0.8745, "step": 3876 }, { "epoch": 0.59, "grad_norm": 2.7138063748947827, "learning_rate": 7.486508629762818e-06, "loss": 0.848, "step": 3877 }, { "epoch": 0.59, "grad_norm": 2.508036618474075, "learning_rate": 7.481710553956296e-06, "loss": 0.7674, "step": 3878 }, { "epoch": 0.59, "grad_norm": 2.770740350268557, "learning_rate": 7.476913097074692e-06, "loss": 0.8912, "step": 3879 }, { "epoch": 0.59, "grad_norm": 2.6922695290496668, "learning_rate": 7.472116260297087e-06, "loss": 0.8769, "step": 3880 }, { "epoch": 0.59, "grad_norm": 2.57309161479025, "learning_rate": 7.467320044802409e-06, "loss": 0.796, "step": 3881 }, { "epoch": 0.59, "grad_norm": 2.512958056132519, "learning_rate": 7.462524451769426e-06, "loss": 0.9029, "step": 3882 }, { "epoch": 0.59, "grad_norm": 2.6277879501170536, "learning_rate": 7.457729482376772e-06, "loss": 0.7873, "step": 3883 }, { "epoch": 0.59, "grad_norm": 2.765976568443896, "learning_rate": 7.4529351378029094e-06, "loss": 0.9712, "step": 3884 }, { "epoch": 0.59, "grad_norm": 2.4778448264754562, "learning_rate": 7.4481414192261556e-06, "loss": 0.7683, "step": 3885 }, { "epoch": 0.59, "grad_norm": 2.607179107319063, "learning_rate": 7.443348327824673e-06, "loss": 0.8287, "step": 3886 }, { "epoch": 0.59, "grad_norm": 2.707889718434232, "learning_rate": 7.438555864776462e-06, "loss": 0.808, "step": 3887 }, { "epoch": 0.6, "grad_norm": 3.9849966740053584, "learning_rate": 7.433764031259385e-06, "loss": 0.9122, "step": 3888 }, { "epoch": 0.6, "grad_norm": 3.112460889317166, "learning_rate": 7.428972828451138e-06, "loss": 0.7092, "step": 3889 }, { "epoch": 0.6, "grad_norm": 2.7988184084042227, "learning_rate": 7.424182257529258e-06, "loss": 0.8475, "step": 3890 }, { "epoch": 0.6, "grad_norm": 2.7428963787567104, "learning_rate": 7.419392319671142e-06, "loss": 0.7922, "step": 3891 }, { "epoch": 0.6, "grad_norm": 4.100613249299371, "learning_rate": 7.414603016054016e-06, "loss": 0.968, "step": 3892 }, { "epoch": 0.6, "grad_norm": 2.6502811402975506, "learning_rate": 7.4098143478549555e-06, "loss": 0.9379, "step": 3893 }, { "epoch": 0.6, "grad_norm": 2.6669734031481216, "learning_rate": 7.405026316250887e-06, "loss": 0.7728, "step": 3894 }, { "epoch": 0.6, "grad_norm": 2.606265333083008, "learning_rate": 7.40023892241857e-06, "loss": 0.7656, "step": 3895 }, { "epoch": 0.6, "grad_norm": 2.4323996483300623, "learning_rate": 7.395452167534615e-06, "loss": 0.8158, "step": 3896 }, { "epoch": 0.6, "grad_norm": 2.649985215785405, "learning_rate": 7.39066605277547e-06, "loss": 0.8632, "step": 3897 }, { "epoch": 0.6, "grad_norm": 2.6220263882415966, "learning_rate": 7.385880579317424e-06, "loss": 0.7514, "step": 3898 }, { "epoch": 0.6, "grad_norm": 2.9149752455005213, "learning_rate": 7.381095748336618e-06, "loss": 0.7993, "step": 3899 }, { "epoch": 0.6, "grad_norm": 2.6720254849645264, "learning_rate": 7.3763115610090284e-06, "loss": 0.9737, "step": 3900 }, { "epoch": 0.6, "grad_norm": 2.431364392785707, "learning_rate": 7.371528018510472e-06, "loss": 0.84, "step": 3901 }, { "epoch": 0.6, "grad_norm": 2.6650297510276664, "learning_rate": 7.3667451220166104e-06, "loss": 0.791, "step": 3902 }, { "epoch": 0.6, "grad_norm": 2.452822733776375, "learning_rate": 7.361962872702947e-06, "loss": 0.7925, "step": 3903 }, { "epoch": 0.6, "grad_norm": 2.377872769290526, "learning_rate": 7.357181271744818e-06, "loss": 0.767, "step": 3904 }, { "epoch": 0.6, "grad_norm": 2.7052139163110382, "learning_rate": 7.352400320317417e-06, "loss": 0.8582, "step": 3905 }, { "epoch": 0.6, "grad_norm": 2.551578609697552, "learning_rate": 7.347620019595758e-06, "loss": 0.8198, "step": 3906 }, { "epoch": 0.6, "grad_norm": 2.491058196271869, "learning_rate": 7.342840370754714e-06, "loss": 0.7494, "step": 3907 }, { "epoch": 0.6, "grad_norm": 2.3744720682912193, "learning_rate": 7.338061374968984e-06, "loss": 0.7923, "step": 3908 }, { "epoch": 0.6, "grad_norm": 2.498751007169318, "learning_rate": 7.333283033413106e-06, "loss": 0.863, "step": 3909 }, { "epoch": 0.6, "grad_norm": 2.6822308660893808, "learning_rate": 7.328505347261471e-06, "loss": 0.8063, "step": 3910 }, { "epoch": 0.6, "grad_norm": 2.701553992183195, "learning_rate": 7.323728317688296e-06, "loss": 0.7296, "step": 3911 }, { "epoch": 0.6, "grad_norm": 2.6067250965264033, "learning_rate": 7.318951945867643e-06, "loss": 0.8595, "step": 3912 }, { "epoch": 0.6, "grad_norm": 2.646105279440585, "learning_rate": 7.314176232973409e-06, "loss": 0.8204, "step": 3913 }, { "epoch": 0.6, "grad_norm": 2.472573274215426, "learning_rate": 7.30940118017933e-06, "loss": 0.7515, "step": 3914 }, { "epoch": 0.6, "grad_norm": 2.889346213895278, "learning_rate": 7.304626788658976e-06, "loss": 0.8521, "step": 3915 }, { "epoch": 0.6, "grad_norm": 2.8143134429258736, "learning_rate": 7.299853059585768e-06, "loss": 0.8683, "step": 3916 }, { "epoch": 0.6, "grad_norm": 3.359919074066636, "learning_rate": 7.295079994132947e-06, "loss": 0.9095, "step": 3917 }, { "epoch": 0.6, "grad_norm": 2.3986782808370823, "learning_rate": 7.290307593473603e-06, "loss": 0.7398, "step": 3918 }, { "epoch": 0.6, "grad_norm": 2.7038658777691236, "learning_rate": 7.2855358587806554e-06, "loss": 0.8169, "step": 3919 }, { "epoch": 0.6, "grad_norm": 2.5069386708867407, "learning_rate": 7.28076479122686e-06, "loss": 0.8501, "step": 3920 }, { "epoch": 0.6, "grad_norm": 2.517053657035505, "learning_rate": 7.27599439198482e-06, "loss": 0.7897, "step": 3921 }, { "epoch": 0.6, "grad_norm": 2.8360269490317953, "learning_rate": 7.2712246622269615e-06, "loss": 0.8144, "step": 3922 }, { "epoch": 0.6, "grad_norm": 2.9258561023362706, "learning_rate": 7.266455603125546e-06, "loss": 0.8818, "step": 3923 }, { "epoch": 0.6, "grad_norm": 2.6528039617127424, "learning_rate": 7.261687215852681e-06, "loss": 0.7645, "step": 3924 }, { "epoch": 0.6, "grad_norm": 2.935343617366732, "learning_rate": 7.2569195015803e-06, "loss": 0.8296, "step": 3925 }, { "epoch": 0.6, "grad_norm": 2.4898009977984827, "learning_rate": 7.252152461480166e-06, "loss": 0.7976, "step": 3926 }, { "epoch": 0.6, "grad_norm": 4.576448374761486, "learning_rate": 7.247386096723899e-06, "loss": 0.9198, "step": 3927 }, { "epoch": 0.6, "grad_norm": 2.387055416933314, "learning_rate": 7.2426204084829225e-06, "loss": 0.7474, "step": 3928 }, { "epoch": 0.6, "grad_norm": 2.6562445306220006, "learning_rate": 7.237855397928522e-06, "loss": 0.939, "step": 3929 }, { "epoch": 0.6, "grad_norm": 2.4856046326947054, "learning_rate": 7.233091066231796e-06, "loss": 0.7783, "step": 3930 }, { "epoch": 0.6, "grad_norm": 2.5341068263640323, "learning_rate": 7.228327414563683e-06, "loss": 0.8302, "step": 3931 }, { "epoch": 0.6, "grad_norm": 2.6776321393981934, "learning_rate": 7.223564444094959e-06, "loss": 0.7617, "step": 3932 }, { "epoch": 0.6, "grad_norm": 2.6597687482328407, "learning_rate": 7.218802155996227e-06, "loss": 0.8918, "step": 3933 }, { "epoch": 0.6, "grad_norm": 3.0271167434689525, "learning_rate": 7.214040551437919e-06, "loss": 0.9506, "step": 3934 }, { "epoch": 0.6, "grad_norm": 2.5823896739606136, "learning_rate": 7.209279631590313e-06, "loss": 0.7003, "step": 3935 }, { "epoch": 0.6, "grad_norm": 2.748517455966857, "learning_rate": 7.204519397623502e-06, "loss": 0.8306, "step": 3936 }, { "epoch": 0.6, "grad_norm": 2.612978903513092, "learning_rate": 7.199759850707424e-06, "loss": 0.7482, "step": 3937 }, { "epoch": 0.6, "grad_norm": 2.6809929088899294, "learning_rate": 7.195000992011838e-06, "loss": 0.7985, "step": 3938 }, { "epoch": 0.6, "grad_norm": 2.5825987471777534, "learning_rate": 7.190242822706335e-06, "loss": 0.7688, "step": 3939 }, { "epoch": 0.6, "grad_norm": 2.5087445915228046, "learning_rate": 7.185485343960348e-06, "loss": 0.7756, "step": 3940 }, { "epoch": 0.6, "grad_norm": 2.6229532585798303, "learning_rate": 7.180728556943128e-06, "loss": 0.8737, "step": 3941 }, { "epoch": 0.6, "grad_norm": 2.8277366555711994, "learning_rate": 7.175972462823757e-06, "loss": 0.739, "step": 3942 }, { "epoch": 0.6, "grad_norm": 3.6299431073851434, "learning_rate": 7.171217062771153e-06, "loss": 0.899, "step": 3943 }, { "epoch": 0.6, "grad_norm": 2.7129768358506614, "learning_rate": 7.16646235795406e-06, "loss": 0.8835, "step": 3944 }, { "epoch": 0.6, "grad_norm": 2.5883181574655696, "learning_rate": 7.161708349541044e-06, "loss": 0.8523, "step": 3945 }, { "epoch": 0.6, "grad_norm": 2.5426023684011723, "learning_rate": 7.156955038700519e-06, "loss": 0.7801, "step": 3946 }, { "epoch": 0.6, "grad_norm": 3.0473689365131404, "learning_rate": 7.1522024266007065e-06, "loss": 0.793, "step": 3947 }, { "epoch": 0.6, "grad_norm": 2.7815288430966674, "learning_rate": 7.14745051440967e-06, "loss": 0.9378, "step": 3948 }, { "epoch": 0.6, "grad_norm": 2.978236151092738, "learning_rate": 7.1426993032952926e-06, "loss": 0.9328, "step": 3949 }, { "epoch": 0.6, "grad_norm": 2.7000413945650714, "learning_rate": 7.137948794425289e-06, "loss": 0.8611, "step": 3950 }, { "epoch": 0.6, "grad_norm": 2.851811931723268, "learning_rate": 7.133198988967205e-06, "loss": 0.835, "step": 3951 }, { "epoch": 0.6, "grad_norm": 2.785889191122522, "learning_rate": 7.128449888088409e-06, "loss": 0.7782, "step": 3952 }, { "epoch": 0.61, "grad_norm": 2.7193290149705818, "learning_rate": 7.12370149295609e-06, "loss": 0.8046, "step": 3953 }, { "epoch": 0.61, "grad_norm": 2.643197344893586, "learning_rate": 7.118953804737278e-06, "loss": 0.888, "step": 3954 }, { "epoch": 0.61, "grad_norm": 2.64274182536652, "learning_rate": 7.1142068245988216e-06, "loss": 0.8475, "step": 3955 }, { "epoch": 0.61, "grad_norm": 2.509853365664716, "learning_rate": 7.109460553707386e-06, "loss": 0.8149, "step": 3956 }, { "epoch": 0.61, "grad_norm": 2.403526176543108, "learning_rate": 7.104714993229482e-06, "loss": 0.6655, "step": 3957 }, { "epoch": 0.61, "grad_norm": 2.7434615498012906, "learning_rate": 7.0999701443314295e-06, "loss": 0.7385, "step": 3958 }, { "epoch": 0.61, "grad_norm": 2.697601809689983, "learning_rate": 7.095226008179384e-06, "loss": 0.7553, "step": 3959 }, { "epoch": 0.61, "grad_norm": 5.203386630427518, "learning_rate": 7.090482585939318e-06, "loss": 0.9734, "step": 3960 }, { "epoch": 0.61, "grad_norm": 2.8165695481632227, "learning_rate": 7.085739878777027e-06, "loss": 0.8349, "step": 3961 }, { "epoch": 0.61, "grad_norm": 4.609275504845507, "learning_rate": 7.080997887858145e-06, "loss": 0.9396, "step": 3962 }, { "epoch": 0.61, "grad_norm": 2.8280813499025252, "learning_rate": 7.076256614348116e-06, "loss": 0.8813, "step": 3963 }, { "epoch": 0.61, "grad_norm": 2.3852433919170135, "learning_rate": 7.07151605941221e-06, "loss": 0.741, "step": 3964 }, { "epoch": 0.61, "grad_norm": 2.288825295359347, "learning_rate": 7.066776224215526e-06, "loss": 0.6835, "step": 3965 }, { "epoch": 0.61, "grad_norm": 2.628680125563155, "learning_rate": 7.06203710992298e-06, "loss": 0.8354, "step": 3966 }, { "epoch": 0.61, "grad_norm": 2.643593615500088, "learning_rate": 7.05729871769931e-06, "loss": 0.8535, "step": 3967 }, { "epoch": 0.61, "grad_norm": 2.676690991946433, "learning_rate": 7.052561048709089e-06, "loss": 0.8382, "step": 3968 }, { "epoch": 0.61, "grad_norm": 2.4738463303615092, "learning_rate": 7.0478241041166964e-06, "loss": 0.8537, "step": 3969 }, { "epoch": 0.61, "grad_norm": 2.7570416706638268, "learning_rate": 7.043087885086343e-06, "loss": 0.8763, "step": 3970 }, { "epoch": 0.61, "grad_norm": 2.927495685980187, "learning_rate": 7.038352392782057e-06, "loss": 0.7795, "step": 3971 }, { "epoch": 0.61, "grad_norm": 2.6282299744327693, "learning_rate": 7.033617628367688e-06, "loss": 0.8111, "step": 3972 }, { "epoch": 0.61, "grad_norm": 2.4653497111160387, "learning_rate": 7.028883593006914e-06, "loss": 0.8788, "step": 3973 }, { "epoch": 0.61, "grad_norm": 2.521042858506162, "learning_rate": 7.024150287863225e-06, "loss": 0.8709, "step": 3974 }, { "epoch": 0.61, "grad_norm": 2.931058675577534, "learning_rate": 7.019417714099933e-06, "loss": 0.9379, "step": 3975 }, { "epoch": 0.61, "grad_norm": 2.8664809412726044, "learning_rate": 7.014685872880175e-06, "loss": 0.7604, "step": 3976 }, { "epoch": 0.61, "grad_norm": 2.683922110588336, "learning_rate": 7.009954765366902e-06, "loss": 0.8586, "step": 3977 }, { "epoch": 0.61, "grad_norm": 2.467799967600729, "learning_rate": 7.005224392722886e-06, "loss": 0.7699, "step": 3978 }, { "epoch": 0.61, "grad_norm": 2.5448159972295774, "learning_rate": 7.000494756110726e-06, "loss": 0.8426, "step": 3979 }, { "epoch": 0.61, "grad_norm": 2.6894278394301554, "learning_rate": 6.995765856692829e-06, "loss": 0.8512, "step": 3980 }, { "epoch": 0.61, "grad_norm": 2.53178759299369, "learning_rate": 6.991037695631429e-06, "loss": 0.8591, "step": 3981 }, { "epoch": 0.61, "grad_norm": 2.718043311800521, "learning_rate": 6.986310274088574e-06, "loss": 0.8791, "step": 3982 }, { "epoch": 0.61, "grad_norm": 2.573133728510007, "learning_rate": 6.981583593226129e-06, "loss": 0.773, "step": 3983 }, { "epoch": 0.61, "grad_norm": 2.3568678578730933, "learning_rate": 6.976857654205784e-06, "loss": 0.7653, "step": 3984 }, { "epoch": 0.61, "grad_norm": 2.69196387885202, "learning_rate": 6.972132458189041e-06, "loss": 0.844, "step": 3985 }, { "epoch": 0.61, "grad_norm": 2.619016705936952, "learning_rate": 6.96740800633722e-06, "loss": 0.8745, "step": 3986 }, { "epoch": 0.61, "grad_norm": 2.5620443105698874, "learning_rate": 6.962684299811458e-06, "loss": 0.8514, "step": 3987 }, { "epoch": 0.61, "grad_norm": 2.693222913880473, "learning_rate": 6.957961339772712e-06, "loss": 0.898, "step": 3988 }, { "epoch": 0.61, "grad_norm": 2.8891906623178865, "learning_rate": 6.953239127381747e-06, "loss": 0.8755, "step": 3989 }, { "epoch": 0.61, "grad_norm": 2.519827989426567, "learning_rate": 6.948517663799159e-06, "loss": 0.8378, "step": 3990 }, { "epoch": 0.61, "grad_norm": 2.6792360398650534, "learning_rate": 6.943796950185344e-06, "loss": 0.727, "step": 3991 }, { "epoch": 0.61, "grad_norm": 3.323976172597204, "learning_rate": 6.939076987700528e-06, "loss": 0.826, "step": 3992 }, { "epoch": 0.61, "grad_norm": 2.8847629787119793, "learning_rate": 6.934357777504741e-06, "loss": 0.8186, "step": 3993 }, { "epoch": 0.61, "grad_norm": 2.5969932789141064, "learning_rate": 6.929639320757829e-06, "loss": 0.7931, "step": 3994 }, { "epoch": 0.61, "grad_norm": 3.013839101459728, "learning_rate": 6.924921618619465e-06, "loss": 0.8448, "step": 3995 }, { "epoch": 0.61, "grad_norm": 2.8119454609352896, "learning_rate": 6.920204672249122e-06, "loss": 0.8568, "step": 3996 }, { "epoch": 0.61, "grad_norm": 2.555707248166815, "learning_rate": 6.915488482806094e-06, "loss": 0.7189, "step": 3997 }, { "epoch": 0.61, "grad_norm": 2.626316012005687, "learning_rate": 6.9107730514494905e-06, "loss": 0.891, "step": 3998 }, { "epoch": 0.61, "grad_norm": 2.6351268057180492, "learning_rate": 6.906058379338229e-06, "loss": 0.8713, "step": 3999 }, { "epoch": 0.61, "grad_norm": 2.8765134930506817, "learning_rate": 6.901344467631041e-06, "loss": 0.8725, "step": 4000 }, { "epoch": 0.61, "grad_norm": 2.423697084064312, "learning_rate": 6.896631317486484e-06, "loss": 0.7581, "step": 4001 }, { "epoch": 0.61, "grad_norm": 2.773687716759275, "learning_rate": 6.891918930062908e-06, "loss": 0.7716, "step": 4002 }, { "epoch": 0.61, "grad_norm": 3.0364915828207826, "learning_rate": 6.887207306518493e-06, "loss": 0.753, "step": 4003 }, { "epoch": 0.61, "grad_norm": 2.421734376856012, "learning_rate": 6.8824964480112185e-06, "loss": 0.8384, "step": 4004 }, { "epoch": 0.61, "grad_norm": 2.8184012050044376, "learning_rate": 6.877786355698881e-06, "loss": 0.8738, "step": 4005 }, { "epoch": 0.61, "grad_norm": 2.7558547785075453, "learning_rate": 6.873077030739095e-06, "loss": 0.7818, "step": 4006 }, { "epoch": 0.61, "grad_norm": 2.495768634000435, "learning_rate": 6.868368474289278e-06, "loss": 0.8092, "step": 4007 }, { "epoch": 0.61, "grad_norm": 3.026842608734279, "learning_rate": 6.8636606875066556e-06, "loss": 0.8685, "step": 4008 }, { "epoch": 0.61, "grad_norm": 2.495028706880457, "learning_rate": 6.8589536715482786e-06, "loss": 0.7991, "step": 4009 }, { "epoch": 0.61, "grad_norm": 3.109487577384096, "learning_rate": 6.8542474275709906e-06, "loss": 0.8021, "step": 4010 }, { "epoch": 0.61, "grad_norm": 2.5455720562557658, "learning_rate": 6.8495419567314625e-06, "loss": 0.8722, "step": 4011 }, { "epoch": 0.61, "grad_norm": 2.513268664427139, "learning_rate": 6.844837260186164e-06, "loss": 0.8785, "step": 4012 }, { "epoch": 0.61, "grad_norm": 3.050419928001916, "learning_rate": 6.840133339091375e-06, "loss": 0.7724, "step": 4013 }, { "epoch": 0.61, "grad_norm": 2.6074989364288426, "learning_rate": 6.835430194603191e-06, "loss": 0.8731, "step": 4014 }, { "epoch": 0.61, "grad_norm": 2.43468203557067, "learning_rate": 6.8307278278775125e-06, "loss": 0.9025, "step": 4015 }, { "epoch": 0.61, "grad_norm": 2.824045907126482, "learning_rate": 6.826026240070043e-06, "loss": 0.7948, "step": 4016 }, { "epoch": 0.61, "grad_norm": 2.730762641642708, "learning_rate": 6.821325432336311e-06, "loss": 0.812, "step": 4017 }, { "epoch": 0.62, "grad_norm": 2.531912929558859, "learning_rate": 6.816625405831638e-06, "loss": 0.7354, "step": 4018 }, { "epoch": 0.62, "grad_norm": 2.572901191167967, "learning_rate": 6.811926161711157e-06, "loss": 0.8102, "step": 4019 }, { "epoch": 0.62, "grad_norm": 2.726521586567872, "learning_rate": 6.807227701129816e-06, "loss": 0.8618, "step": 4020 }, { "epoch": 0.62, "grad_norm": 2.634553428080092, "learning_rate": 6.802530025242356e-06, "loss": 0.8396, "step": 4021 }, { "epoch": 0.62, "grad_norm": 2.7548342410377984, "learning_rate": 6.797833135203345e-06, "loss": 0.8199, "step": 4022 }, { "epoch": 0.62, "grad_norm": 2.6846160687305054, "learning_rate": 6.79313703216714e-06, "loss": 0.8959, "step": 4023 }, { "epoch": 0.62, "grad_norm": 2.8043730901195687, "learning_rate": 6.7884417172879104e-06, "loss": 0.7931, "step": 4024 }, { "epoch": 0.62, "grad_norm": 2.545970678283305, "learning_rate": 6.783747191719637e-06, "loss": 0.7749, "step": 4025 }, { "epoch": 0.62, "grad_norm": 2.5451689723358326, "learning_rate": 6.779053456616102e-06, "loss": 0.7443, "step": 4026 }, { "epoch": 0.62, "grad_norm": 4.529972058083067, "learning_rate": 6.774360513130888e-06, "loss": 0.975, "step": 4027 }, { "epoch": 0.62, "grad_norm": 2.611937243351216, "learning_rate": 6.769668362417397e-06, "loss": 0.9137, "step": 4028 }, { "epoch": 0.62, "grad_norm": 2.644050252900844, "learning_rate": 6.7649770056288245e-06, "loss": 0.8149, "step": 4029 }, { "epoch": 0.62, "grad_norm": 2.504733855409512, "learning_rate": 6.760286443918172e-06, "loss": 0.8504, "step": 4030 }, { "epoch": 0.62, "grad_norm": 3.8987650988567344, "learning_rate": 6.755596678438253e-06, "loss": 0.9102, "step": 4031 }, { "epoch": 0.62, "grad_norm": 2.6816926813216684, "learning_rate": 6.750907710341674e-06, "loss": 0.777, "step": 4032 }, { "epoch": 0.62, "grad_norm": 2.568770879329872, "learning_rate": 6.746219540780859e-06, "loss": 0.8053, "step": 4033 }, { "epoch": 0.62, "grad_norm": 2.6798556604625077, "learning_rate": 6.7415321709080254e-06, "loss": 0.9544, "step": 4034 }, { "epoch": 0.62, "grad_norm": 2.558759654519992, "learning_rate": 6.736845601875195e-06, "loss": 0.7916, "step": 4035 }, { "epoch": 0.62, "grad_norm": 2.85191964706347, "learning_rate": 6.7321598348342e-06, "loss": 0.7775, "step": 4036 }, { "epoch": 0.62, "grad_norm": 2.699746268096858, "learning_rate": 6.727474870936667e-06, "loss": 0.8829, "step": 4037 }, { "epoch": 0.62, "grad_norm": 2.6224212365035386, "learning_rate": 6.722790711334024e-06, "loss": 0.7747, "step": 4038 }, { "epoch": 0.62, "grad_norm": 2.891059389978791, "learning_rate": 6.7181073571775165e-06, "loss": 0.7886, "step": 4039 }, { "epoch": 0.62, "grad_norm": 2.623580531520144, "learning_rate": 6.713424809618176e-06, "loss": 0.8556, "step": 4040 }, { "epoch": 0.62, "grad_norm": 2.6496413746498084, "learning_rate": 6.708743069806839e-06, "loss": 0.8466, "step": 4041 }, { "epoch": 0.62, "grad_norm": 2.9920543365646664, "learning_rate": 6.704062138894154e-06, "loss": 0.7497, "step": 4042 }, { "epoch": 0.62, "grad_norm": 2.4311107806698398, "learning_rate": 6.699382018030551e-06, "loss": 0.8919, "step": 4043 }, { "epoch": 0.62, "grad_norm": 2.668935722922524, "learning_rate": 6.694702708366283e-06, "loss": 0.8383, "step": 4044 }, { "epoch": 0.62, "grad_norm": 2.6708486158887257, "learning_rate": 6.690024211051389e-06, "loss": 0.8418, "step": 4045 }, { "epoch": 0.62, "grad_norm": 2.611173432896564, "learning_rate": 6.685346527235707e-06, "loss": 0.778, "step": 4046 }, { "epoch": 0.62, "grad_norm": 2.724855065721752, "learning_rate": 6.68066965806889e-06, "loss": 0.8278, "step": 4047 }, { "epoch": 0.62, "grad_norm": 2.749989817656343, "learning_rate": 6.67599360470038e-06, "loss": 0.7917, "step": 4048 }, { "epoch": 0.62, "grad_norm": 2.516759813149189, "learning_rate": 6.6713183682794104e-06, "loss": 0.7597, "step": 4049 }, { "epoch": 0.62, "grad_norm": 2.700800674394037, "learning_rate": 6.666643949955033e-06, "loss": 0.9328, "step": 4050 }, { "epoch": 0.62, "grad_norm": 2.3983122261201006, "learning_rate": 6.6619703508760855e-06, "loss": 0.7261, "step": 4051 }, { "epoch": 0.62, "grad_norm": 2.748672614839632, "learning_rate": 6.657297572191202e-06, "loss": 0.8521, "step": 4052 }, { "epoch": 0.62, "grad_norm": 2.6261395166600905, "learning_rate": 6.652625615048831e-06, "loss": 0.8024, "step": 4053 }, { "epoch": 0.62, "grad_norm": 2.4759735826351665, "learning_rate": 6.6479544805972e-06, "loss": 0.776, "step": 4054 }, { "epoch": 0.62, "grad_norm": 2.506862194790913, "learning_rate": 6.64328416998435e-06, "loss": 0.7892, "step": 4055 }, { "epoch": 0.62, "grad_norm": 2.525522899555866, "learning_rate": 6.6386146843581075e-06, "loss": 0.76, "step": 4056 }, { "epoch": 0.62, "grad_norm": 2.90535564554283, "learning_rate": 6.633946024866098e-06, "loss": 0.8547, "step": 4057 }, { "epoch": 0.62, "grad_norm": 2.8295321791625, "learning_rate": 6.629278192655755e-06, "loss": 0.8736, "step": 4058 }, { "epoch": 0.62, "grad_norm": 2.8648454263022014, "learning_rate": 6.624611188874297e-06, "loss": 0.8359, "step": 4059 }, { "epoch": 0.62, "grad_norm": 2.4725522208039368, "learning_rate": 6.619945014668741e-06, "loss": 0.829, "step": 4060 }, { "epoch": 0.62, "grad_norm": 2.924624596128136, "learning_rate": 6.615279671185904e-06, "loss": 0.783, "step": 4061 }, { "epoch": 0.62, "grad_norm": 2.308448959924426, "learning_rate": 6.6106151595723955e-06, "loss": 0.6837, "step": 4062 }, { "epoch": 0.62, "grad_norm": 2.7063686985335793, "learning_rate": 6.605951480974618e-06, "loss": 0.7703, "step": 4063 }, { "epoch": 0.62, "grad_norm": 2.6589391223667045, "learning_rate": 6.601288636538782e-06, "loss": 0.8667, "step": 4064 }, { "epoch": 0.62, "grad_norm": 2.544162074879167, "learning_rate": 6.596626627410876e-06, "loss": 0.7775, "step": 4065 }, { "epoch": 0.62, "grad_norm": 2.7523919460415818, "learning_rate": 6.591965454736695e-06, "loss": 0.8147, "step": 4066 }, { "epoch": 0.62, "grad_norm": 4.008385669960899, "learning_rate": 6.587305119661824e-06, "loss": 0.9105, "step": 4067 }, { "epoch": 0.62, "grad_norm": 2.769938732126488, "learning_rate": 6.582645623331638e-06, "loss": 0.8769, "step": 4068 }, { "epoch": 0.62, "grad_norm": 2.67656946337048, "learning_rate": 6.577986966891319e-06, "loss": 0.8576, "step": 4069 }, { "epoch": 0.62, "grad_norm": 2.917024733414259, "learning_rate": 6.57332915148583e-06, "loss": 0.6818, "step": 4070 }, { "epoch": 0.62, "grad_norm": 2.5314665259462976, "learning_rate": 6.568672178259927e-06, "loss": 0.7612, "step": 4071 }, { "epoch": 0.62, "grad_norm": 2.767389303924322, "learning_rate": 6.564016048358171e-06, "loss": 0.7915, "step": 4072 }, { "epoch": 0.62, "grad_norm": 2.5813872988695876, "learning_rate": 6.559360762924905e-06, "loss": 0.8387, "step": 4073 }, { "epoch": 0.62, "grad_norm": 2.626198012687205, "learning_rate": 6.5547063231042616e-06, "loss": 0.8293, "step": 4074 }, { "epoch": 0.62, "grad_norm": 2.616346063221364, "learning_rate": 6.550052730040182e-06, "loss": 0.904, "step": 4075 }, { "epoch": 0.62, "grad_norm": 2.554419912580466, "learning_rate": 6.545399984876382e-06, "loss": 0.8675, "step": 4076 }, { "epoch": 0.62, "grad_norm": 2.5640788502246457, "learning_rate": 6.540748088756379e-06, "loss": 0.7613, "step": 4077 }, { "epoch": 0.62, "grad_norm": 2.704161443937231, "learning_rate": 6.536097042823478e-06, "loss": 0.8895, "step": 4078 }, { "epoch": 0.62, "grad_norm": 2.6358909379667366, "learning_rate": 6.53144684822077e-06, "loss": 0.7766, "step": 4079 }, { "epoch": 0.62, "grad_norm": 2.941858378301113, "learning_rate": 6.5267975060911534e-06, "loss": 0.8079, "step": 4080 }, { "epoch": 0.62, "grad_norm": 2.515359174202313, "learning_rate": 6.5221490175772996e-06, "loss": 0.7194, "step": 4081 }, { "epoch": 0.62, "grad_norm": 2.586250039707266, "learning_rate": 6.517501383821675e-06, "loss": 0.7522, "step": 4082 }, { "epoch": 0.62, "grad_norm": 2.6503763756279386, "learning_rate": 6.512854605966542e-06, "loss": 0.8073, "step": 4083 }, { "epoch": 0.63, "grad_norm": 2.6301952442230596, "learning_rate": 6.508208685153943e-06, "loss": 0.8574, "step": 4084 }, { "epoch": 0.63, "grad_norm": 2.6460918372899784, "learning_rate": 6.5035636225257206e-06, "loss": 0.8026, "step": 4085 }, { "epoch": 0.63, "grad_norm": 2.6467874751339475, "learning_rate": 6.4989194192235e-06, "loss": 0.7865, "step": 4086 }, { "epoch": 0.63, "grad_norm": 2.53926613183767, "learning_rate": 6.4942760763886906e-06, "loss": 0.8668, "step": 4087 }, { "epoch": 0.63, "grad_norm": 2.4622835484736094, "learning_rate": 6.489633595162503e-06, "loss": 0.8309, "step": 4088 }, { "epoch": 0.63, "grad_norm": 2.7848796523663166, "learning_rate": 6.484991976685927e-06, "loss": 0.8824, "step": 4089 }, { "epoch": 0.63, "grad_norm": 3.014091740841856, "learning_rate": 6.4803512220997366e-06, "loss": 0.9, "step": 4090 }, { "epoch": 0.63, "grad_norm": 2.4966799499927443, "learning_rate": 6.475711332544506e-06, "loss": 0.8105, "step": 4091 }, { "epoch": 0.63, "grad_norm": 2.56373881046293, "learning_rate": 6.471072309160589e-06, "loss": 0.8666, "step": 4092 }, { "epoch": 0.63, "grad_norm": 3.102359688208209, "learning_rate": 6.466434153088124e-06, "loss": 0.8941, "step": 4093 }, { "epoch": 0.63, "grad_norm": 2.506941106145849, "learning_rate": 6.461796865467043e-06, "loss": 0.8177, "step": 4094 }, { "epoch": 0.63, "grad_norm": 2.446841206056606, "learning_rate": 6.457160447437055e-06, "loss": 0.8387, "step": 4095 }, { "epoch": 0.63, "grad_norm": 3.039755545522044, "learning_rate": 6.452524900137671e-06, "loss": 0.9508, "step": 4096 }, { "epoch": 0.63, "grad_norm": 2.960153109907913, "learning_rate": 6.447890224708175e-06, "loss": 0.8863, "step": 4097 }, { "epoch": 0.63, "grad_norm": 3.8572295562041097, "learning_rate": 6.443256422287636e-06, "loss": 0.8931, "step": 4098 }, { "epoch": 0.63, "grad_norm": 2.5975817640776717, "learning_rate": 6.4386234940149175e-06, "loss": 0.7323, "step": 4099 }, { "epoch": 0.63, "grad_norm": 2.585156872911483, "learning_rate": 6.433991441028662e-06, "loss": 0.8293, "step": 4100 }, { "epoch": 0.63, "grad_norm": 2.649456417121931, "learning_rate": 6.429360264467295e-06, "loss": 0.8892, "step": 4101 }, { "epoch": 0.63, "grad_norm": 2.8639255281877456, "learning_rate": 6.424729965469035e-06, "loss": 0.9024, "step": 4102 }, { "epoch": 0.63, "grad_norm": 2.780003960122886, "learning_rate": 6.420100545171878e-06, "loss": 0.8657, "step": 4103 }, { "epoch": 0.63, "grad_norm": 2.509960135382255, "learning_rate": 6.4154720047136036e-06, "loss": 0.7157, "step": 4104 }, { "epoch": 0.63, "grad_norm": 2.5979157879958343, "learning_rate": 6.4108443452317795e-06, "loss": 0.8317, "step": 4105 }, { "epoch": 0.63, "grad_norm": 2.4868039343347768, "learning_rate": 6.40621756786375e-06, "loss": 0.8063, "step": 4106 }, { "epoch": 0.63, "grad_norm": 3.2123728570860846, "learning_rate": 6.401591673746654e-06, "loss": 0.7474, "step": 4107 }, { "epoch": 0.63, "grad_norm": 2.8195349629081465, "learning_rate": 6.396966664017406e-06, "loss": 0.8592, "step": 4108 }, { "epoch": 0.63, "grad_norm": 2.74887243369275, "learning_rate": 6.392342539812697e-06, "loss": 0.8349, "step": 4109 }, { "epoch": 0.63, "grad_norm": 2.414885397862542, "learning_rate": 6.387719302269013e-06, "loss": 0.8437, "step": 4110 }, { "epoch": 0.63, "grad_norm": 2.8044942835656865, "learning_rate": 6.383096952522616e-06, "loss": 0.8438, "step": 4111 }, { "epoch": 0.63, "grad_norm": 2.4437840839634037, "learning_rate": 6.378475491709543e-06, "loss": 0.8465, "step": 4112 }, { "epoch": 0.63, "grad_norm": 2.5651394312215334, "learning_rate": 6.373854920965629e-06, "loss": 0.8432, "step": 4113 }, { "epoch": 0.63, "grad_norm": 2.4733310108162367, "learning_rate": 6.369235241426475e-06, "loss": 0.7228, "step": 4114 }, { "epoch": 0.63, "grad_norm": 3.0439512584691006, "learning_rate": 6.3646164542274705e-06, "loss": 0.7518, "step": 4115 }, { "epoch": 0.63, "grad_norm": 2.9206726657813906, "learning_rate": 6.359998560503784e-06, "loss": 0.7668, "step": 4116 }, { "epoch": 0.63, "grad_norm": 2.985452574642286, "learning_rate": 6.355381561390359e-06, "loss": 0.7956, "step": 4117 }, { "epoch": 0.63, "grad_norm": 2.550473017214655, "learning_rate": 6.350765458021935e-06, "loss": 0.7408, "step": 4118 }, { "epoch": 0.63, "grad_norm": 2.5381264489602375, "learning_rate": 6.346150251533013e-06, "loss": 0.8889, "step": 4119 }, { "epoch": 0.63, "grad_norm": 2.575632216288596, "learning_rate": 6.341535943057882e-06, "loss": 0.8932, "step": 4120 }, { "epoch": 0.63, "grad_norm": 2.4305923233576925, "learning_rate": 6.336922533730611e-06, "loss": 0.7149, "step": 4121 }, { "epoch": 0.63, "grad_norm": 2.542358361284761, "learning_rate": 6.332310024685049e-06, "loss": 0.7948, "step": 4122 }, { "epoch": 0.63, "grad_norm": 2.8751522589498832, "learning_rate": 6.327698417054814e-06, "loss": 0.8883, "step": 4123 }, { "epoch": 0.63, "grad_norm": 2.4987312069013172, "learning_rate": 6.3230877119733184e-06, "loss": 0.8417, "step": 4124 }, { "epoch": 0.63, "grad_norm": 2.626332516673871, "learning_rate": 6.318477910573739e-06, "loss": 0.8392, "step": 4125 }, { "epoch": 0.63, "grad_norm": 2.884330720549634, "learning_rate": 6.313869013989037e-06, "loss": 0.8784, "step": 4126 }, { "epoch": 0.63, "grad_norm": 2.433399590467831, "learning_rate": 6.309261023351951e-06, "loss": 0.7685, "step": 4127 }, { "epoch": 0.63, "grad_norm": 4.234724727523963, "learning_rate": 6.304653939794991e-06, "loss": 0.9656, "step": 4128 }, { "epoch": 0.63, "grad_norm": 3.7665147719725556, "learning_rate": 6.300047764450456e-06, "loss": 0.8758, "step": 4129 }, { "epoch": 0.63, "grad_norm": 2.6819728229640902, "learning_rate": 6.295442498450413e-06, "loss": 0.8419, "step": 4130 }, { "epoch": 0.63, "grad_norm": 2.405193789764089, "learning_rate": 6.290838142926702e-06, "loss": 0.7792, "step": 4131 }, { "epoch": 0.63, "grad_norm": 2.5564651239366065, "learning_rate": 6.286234699010951e-06, "loss": 0.7418, "step": 4132 }, { "epoch": 0.63, "grad_norm": 2.547961536655596, "learning_rate": 6.281632167834555e-06, "loss": 0.8122, "step": 4133 }, { "epoch": 0.63, "grad_norm": 2.579679050497923, "learning_rate": 6.277030550528681e-06, "loss": 0.8219, "step": 4134 }, { "epoch": 0.63, "grad_norm": 2.6577361052780657, "learning_rate": 6.2724298482242885e-06, "loss": 0.843, "step": 4135 }, { "epoch": 0.63, "grad_norm": 2.796823419353427, "learning_rate": 6.267830062052095e-06, "loss": 0.8328, "step": 4136 }, { "epoch": 0.63, "grad_norm": 2.675985550773138, "learning_rate": 6.263231193142598e-06, "loss": 0.7838, "step": 4137 }, { "epoch": 0.63, "grad_norm": 2.5707725044542515, "learning_rate": 6.258633242626073e-06, "loss": 0.8262, "step": 4138 }, { "epoch": 0.63, "grad_norm": 2.813092623388354, "learning_rate": 6.254036211632563e-06, "loss": 0.8425, "step": 4139 }, { "epoch": 0.63, "grad_norm": 2.568730615011392, "learning_rate": 6.249440101291898e-06, "loss": 0.7452, "step": 4140 }, { "epoch": 0.63, "grad_norm": 4.755377671537752, "learning_rate": 6.244844912733667e-06, "loss": 0.8829, "step": 4141 }, { "epoch": 0.63, "grad_norm": 2.558947372504722, "learning_rate": 6.240250647087236e-06, "loss": 0.897, "step": 4142 }, { "epoch": 0.63, "grad_norm": 2.745447153058319, "learning_rate": 6.235657305481752e-06, "loss": 0.8688, "step": 4143 }, { "epoch": 0.63, "grad_norm": 2.647907903816932, "learning_rate": 6.23106488904613e-06, "loss": 0.8379, "step": 4144 }, { "epoch": 0.63, "grad_norm": 2.790647314327554, "learning_rate": 6.2264733989090475e-06, "loss": 0.8742, "step": 4145 }, { "epoch": 0.63, "grad_norm": 2.9750857386672114, "learning_rate": 6.221882836198977e-06, "loss": 0.8778, "step": 4146 }, { "epoch": 0.63, "grad_norm": 2.3547014311971215, "learning_rate": 6.217293202044143e-06, "loss": 0.6963, "step": 4147 }, { "epoch": 0.63, "grad_norm": 2.4512617810217496, "learning_rate": 6.212704497572548e-06, "loss": 0.7892, "step": 4148 }, { "epoch": 0.64, "grad_norm": 2.5369909191742073, "learning_rate": 6.208116723911971e-06, "loss": 0.7219, "step": 4149 }, { "epoch": 0.64, "grad_norm": 2.536979432046566, "learning_rate": 6.203529882189951e-06, "loss": 0.7174, "step": 4150 }, { "epoch": 0.64, "grad_norm": 2.452686560261575, "learning_rate": 6.198943973533816e-06, "loss": 0.7855, "step": 4151 }, { "epoch": 0.64, "grad_norm": 3.6527193305604295, "learning_rate": 6.194358999070646e-06, "loss": 0.7499, "step": 4152 }, { "epoch": 0.64, "grad_norm": 2.3863825447074682, "learning_rate": 6.189774959927297e-06, "loss": 0.744, "step": 4153 }, { "epoch": 0.64, "grad_norm": 2.5502961600790366, "learning_rate": 6.185191857230408e-06, "loss": 0.7702, "step": 4154 }, { "epoch": 0.64, "grad_norm": 2.777791413090332, "learning_rate": 6.180609692106368e-06, "loss": 0.7513, "step": 4155 }, { "epoch": 0.64, "grad_norm": 2.7429295672784546, "learning_rate": 6.176028465681343e-06, "loss": 0.7781, "step": 4156 }, { "epoch": 0.64, "grad_norm": 2.583623335809288, "learning_rate": 6.1714481790812765e-06, "loss": 0.7991, "step": 4157 }, { "epoch": 0.64, "grad_norm": 2.5019251528214976, "learning_rate": 6.166868833431869e-06, "loss": 0.7802, "step": 4158 }, { "epoch": 0.64, "grad_norm": 2.46943361864758, "learning_rate": 6.162290429858602e-06, "loss": 0.8392, "step": 4159 }, { "epoch": 0.64, "grad_norm": 2.697621406379056, "learning_rate": 6.157712969486716e-06, "loss": 0.7135, "step": 4160 }, { "epoch": 0.64, "grad_norm": 2.5361851349014426, "learning_rate": 6.153136453441216e-06, "loss": 0.7468, "step": 4161 }, { "epoch": 0.64, "grad_norm": 2.777669008265395, "learning_rate": 6.14856088284689e-06, "loss": 0.7782, "step": 4162 }, { "epoch": 0.64, "grad_norm": 2.676393263557243, "learning_rate": 6.14398625882828e-06, "loss": 0.8292, "step": 4163 }, { "epoch": 0.64, "grad_norm": 2.629987864288778, "learning_rate": 6.1394125825097005e-06, "loss": 0.7771, "step": 4164 }, { "epoch": 0.64, "grad_norm": 2.3706226588091357, "learning_rate": 6.134839855015235e-06, "loss": 0.7614, "step": 4165 }, { "epoch": 0.64, "grad_norm": 2.9096277892102456, "learning_rate": 6.1302680774687325e-06, "loss": 0.8288, "step": 4166 }, { "epoch": 0.64, "grad_norm": 2.266472116130424, "learning_rate": 6.125697250993804e-06, "loss": 0.617, "step": 4167 }, { "epoch": 0.64, "grad_norm": 2.546047394461752, "learning_rate": 6.1211273767138336e-06, "loss": 0.8736, "step": 4168 }, { "epoch": 0.64, "grad_norm": 2.7568097786899677, "learning_rate": 6.1165584557519634e-06, "loss": 0.7315, "step": 4169 }, { "epoch": 0.64, "grad_norm": 2.5725431683106645, "learning_rate": 6.111990489231114e-06, "loss": 0.7479, "step": 4170 }, { "epoch": 0.64, "grad_norm": 2.418434465044092, "learning_rate": 6.1074234782739576e-06, "loss": 0.8278, "step": 4171 }, { "epoch": 0.64, "grad_norm": 2.6811260628803524, "learning_rate": 6.102857424002937e-06, "loss": 0.8043, "step": 4172 }, { "epoch": 0.64, "grad_norm": 2.7969088357041114, "learning_rate": 6.098292327540265e-06, "loss": 0.8, "step": 4173 }, { "epoch": 0.64, "grad_norm": 2.8678984331762813, "learning_rate": 6.093728190007912e-06, "loss": 0.8598, "step": 4174 }, { "epoch": 0.64, "grad_norm": 2.7141740857093932, "learning_rate": 6.089165012527609e-06, "loss": 0.7699, "step": 4175 }, { "epoch": 0.64, "grad_norm": 2.7721712895307693, "learning_rate": 6.084602796220866e-06, "loss": 0.8596, "step": 4176 }, { "epoch": 0.64, "grad_norm": 2.534281013021705, "learning_rate": 6.080041542208946e-06, "loss": 0.7828, "step": 4177 }, { "epoch": 0.64, "grad_norm": 2.551375290088987, "learning_rate": 6.075481251612873e-06, "loss": 0.7576, "step": 4178 }, { "epoch": 0.64, "grad_norm": 2.8120490143121186, "learning_rate": 6.0709219255534424e-06, "loss": 0.8315, "step": 4179 }, { "epoch": 0.64, "grad_norm": 3.2619929727898964, "learning_rate": 6.066363565151203e-06, "loss": 0.8645, "step": 4180 }, { "epoch": 0.64, "grad_norm": 2.5383095550716197, "learning_rate": 6.061806171526482e-06, "loss": 0.7716, "step": 4181 }, { "epoch": 0.64, "grad_norm": 2.4670004389518843, "learning_rate": 6.0572497457993515e-06, "loss": 0.7649, "step": 4182 }, { "epoch": 0.64, "grad_norm": 2.2463743452604414, "learning_rate": 6.052694289089655e-06, "loss": 0.7303, "step": 4183 }, { "epoch": 0.64, "grad_norm": 2.8256884332350682, "learning_rate": 6.048139802516997e-06, "loss": 0.7508, "step": 4184 }, { "epoch": 0.64, "grad_norm": 2.6277561858184866, "learning_rate": 6.04358628720074e-06, "loss": 0.7525, "step": 4185 }, { "epoch": 0.64, "grad_norm": 2.788573749816516, "learning_rate": 6.039033744260009e-06, "loss": 0.8504, "step": 4186 }, { "epoch": 0.64, "grad_norm": 2.6760899491206636, "learning_rate": 6.034482174813698e-06, "loss": 0.8448, "step": 4187 }, { "epoch": 0.64, "grad_norm": 2.7226554524499753, "learning_rate": 6.0299315799804524e-06, "loss": 0.8442, "step": 4188 }, { "epoch": 0.64, "grad_norm": 2.5518099787008737, "learning_rate": 6.025381960878675e-06, "loss": 0.8306, "step": 4189 }, { "epoch": 0.64, "grad_norm": 2.696138047928778, "learning_rate": 6.020833318626544e-06, "loss": 0.8902, "step": 4190 }, { "epoch": 0.64, "grad_norm": 2.3993432050044063, "learning_rate": 6.016285654341978e-06, "loss": 0.7706, "step": 4191 }, { "epoch": 0.64, "grad_norm": 3.7765750937001203, "learning_rate": 6.011738969142676e-06, "loss": 0.8679, "step": 4192 }, { "epoch": 0.64, "grad_norm": 2.859266254500037, "learning_rate": 6.0071932641460784e-06, "loss": 0.8554, "step": 4193 }, { "epoch": 0.64, "grad_norm": 2.6019050994444024, "learning_rate": 6.002648540469394e-06, "loss": 0.7164, "step": 4194 }, { "epoch": 0.64, "grad_norm": 2.7344950289561343, "learning_rate": 5.9981047992295895e-06, "loss": 0.8349, "step": 4195 }, { "epoch": 0.64, "grad_norm": 2.612788730546887, "learning_rate": 5.993562041543388e-06, "loss": 0.8315, "step": 4196 }, { "epoch": 0.64, "grad_norm": 2.664873642699137, "learning_rate": 5.989020268527268e-06, "loss": 0.8662, "step": 4197 }, { "epoch": 0.64, "grad_norm": 2.7090021448831867, "learning_rate": 5.9844794812974784e-06, "loss": 0.8077, "step": 4198 }, { "epoch": 0.64, "grad_norm": 2.8042750369301563, "learning_rate": 5.979939680970012e-06, "loss": 0.8289, "step": 4199 }, { "epoch": 0.64, "grad_norm": 2.897760226237868, "learning_rate": 5.975400868660624e-06, "loss": 0.845, "step": 4200 }, { "epoch": 0.64, "grad_norm": 2.8252430374625055, "learning_rate": 5.970863045484829e-06, "loss": 0.8849, "step": 4201 }, { "epoch": 0.64, "grad_norm": 2.4574717176968277, "learning_rate": 5.966326212557892e-06, "loss": 0.9231, "step": 4202 }, { "epoch": 0.64, "grad_norm": 2.5705754869443163, "learning_rate": 5.9617903709948485e-06, "loss": 0.8147, "step": 4203 }, { "epoch": 0.64, "grad_norm": 2.559653301159418, "learning_rate": 5.957255521910477e-06, "loss": 0.8086, "step": 4204 }, { "epoch": 0.64, "grad_norm": 2.6067710988929536, "learning_rate": 5.952721666419311e-06, "loss": 0.7963, "step": 4205 }, { "epoch": 0.64, "grad_norm": 2.7621380129077964, "learning_rate": 5.948188805635652e-06, "loss": 0.8049, "step": 4206 }, { "epoch": 0.64, "grad_norm": 2.652662783063158, "learning_rate": 5.9436569406735475e-06, "loss": 0.798, "step": 4207 }, { "epoch": 0.64, "grad_norm": 2.742562653994274, "learning_rate": 5.939126072646798e-06, "loss": 0.8252, "step": 4208 }, { "epoch": 0.64, "grad_norm": 2.4686756050614465, "learning_rate": 5.934596202668973e-06, "loss": 0.7727, "step": 4209 }, { "epoch": 0.64, "grad_norm": 2.5693938881507408, "learning_rate": 5.930067331853382e-06, "loss": 0.8081, "step": 4210 }, { "epoch": 0.64, "grad_norm": 2.6406554355059035, "learning_rate": 5.925539461313095e-06, "loss": 0.6952, "step": 4211 }, { "epoch": 0.64, "grad_norm": 2.567357727216333, "learning_rate": 5.921012592160936e-06, "loss": 0.7968, "step": 4212 }, { "epoch": 0.64, "grad_norm": 3.549508744267877, "learning_rate": 5.916486725509479e-06, "loss": 0.9364, "step": 4213 }, { "epoch": 0.65, "grad_norm": 2.653890717917521, "learning_rate": 5.911961862471063e-06, "loss": 0.8733, "step": 4214 }, { "epoch": 0.65, "grad_norm": 2.5635645587348996, "learning_rate": 5.907438004157767e-06, "loss": 0.8015, "step": 4215 }, { "epoch": 0.65, "grad_norm": 2.901723645052626, "learning_rate": 5.902915151681427e-06, "loss": 0.8542, "step": 4216 }, { "epoch": 0.65, "grad_norm": 2.630135346802989, "learning_rate": 5.89839330615364e-06, "loss": 0.7924, "step": 4217 }, { "epoch": 0.65, "grad_norm": 2.9543974638939074, "learning_rate": 5.893872468685743e-06, "loss": 0.8472, "step": 4218 }, { "epoch": 0.65, "grad_norm": 2.608700762294027, "learning_rate": 5.889352640388828e-06, "loss": 0.7086, "step": 4219 }, { "epoch": 0.65, "grad_norm": 2.6845836414520825, "learning_rate": 5.884833822373751e-06, "loss": 0.8024, "step": 4220 }, { "epoch": 0.65, "grad_norm": 2.712954926371659, "learning_rate": 5.880316015751106e-06, "loss": 0.819, "step": 4221 }, { "epoch": 0.65, "grad_norm": 2.601280457477493, "learning_rate": 5.875799221631242e-06, "loss": 0.7722, "step": 4222 }, { "epoch": 0.65, "grad_norm": 2.523625947183782, "learning_rate": 5.871283441124264e-06, "loss": 0.8626, "step": 4223 }, { "epoch": 0.65, "grad_norm": 2.4552347893262985, "learning_rate": 5.866768675340018e-06, "loss": 0.7041, "step": 4224 }, { "epoch": 0.65, "grad_norm": 3.52178337670741, "learning_rate": 5.8622549253881135e-06, "loss": 0.8436, "step": 4225 }, { "epoch": 0.65, "grad_norm": 2.7843047744222957, "learning_rate": 5.8577421923779025e-06, "loss": 0.7545, "step": 4226 }, { "epoch": 0.65, "grad_norm": 2.7334770181269517, "learning_rate": 5.853230477418483e-06, "loss": 0.9143, "step": 4227 }, { "epoch": 0.65, "grad_norm": 2.551804593077235, "learning_rate": 5.8487197816187145e-06, "loss": 0.7694, "step": 4228 }, { "epoch": 0.65, "grad_norm": 3.827479143157379, "learning_rate": 5.844210106087198e-06, "loss": 0.8784, "step": 4229 }, { "epoch": 0.65, "grad_norm": 2.6859822414158914, "learning_rate": 5.839701451932278e-06, "loss": 0.7843, "step": 4230 }, { "epoch": 0.65, "grad_norm": 2.5468246556889764, "learning_rate": 5.8351938202620666e-06, "loss": 0.862, "step": 4231 }, { "epoch": 0.65, "grad_norm": 4.308901391630756, "learning_rate": 5.830687212184407e-06, "loss": 0.8985, "step": 4232 }, { "epoch": 0.65, "grad_norm": 2.3919305019885573, "learning_rate": 5.826181628806893e-06, "loss": 0.7759, "step": 4233 }, { "epoch": 0.65, "grad_norm": 2.795221305372727, "learning_rate": 5.821677071236881e-06, "loss": 0.8936, "step": 4234 }, { "epoch": 0.65, "grad_norm": 2.543397606862117, "learning_rate": 5.817173540581459e-06, "loss": 0.8577, "step": 4235 }, { "epoch": 0.65, "grad_norm": 2.5323267733398285, "learning_rate": 5.812671037947468e-06, "loss": 0.7667, "step": 4236 }, { "epoch": 0.65, "grad_norm": 2.6574236218151848, "learning_rate": 5.8081695644415e-06, "loss": 0.8143, "step": 4237 }, { "epoch": 0.65, "grad_norm": 2.912775437447401, "learning_rate": 5.803669121169883e-06, "loss": 0.8918, "step": 4238 }, { "epoch": 0.65, "grad_norm": 2.939122520427922, "learning_rate": 5.79916970923871e-06, "loss": 0.858, "step": 4239 }, { "epoch": 0.65, "grad_norm": 2.4747736151679516, "learning_rate": 5.7946713297538045e-06, "loss": 0.8165, "step": 4240 }, { "epoch": 0.65, "grad_norm": 3.640788425357784, "learning_rate": 5.790173983820741e-06, "loss": 0.8805, "step": 4241 }, { "epoch": 0.65, "grad_norm": 2.806348940640807, "learning_rate": 5.785677672544847e-06, "loss": 0.844, "step": 4242 }, { "epoch": 0.65, "grad_norm": 2.5707488108651746, "learning_rate": 5.781182397031182e-06, "loss": 0.8061, "step": 4243 }, { "epoch": 0.65, "grad_norm": 2.638873894062458, "learning_rate": 5.776688158384565e-06, "loss": 0.9218, "step": 4244 }, { "epoch": 0.65, "grad_norm": 2.8972966930258943, "learning_rate": 5.772194957709553e-06, "loss": 0.7464, "step": 4245 }, { "epoch": 0.65, "grad_norm": 2.686284381777874, "learning_rate": 5.767702796110448e-06, "loss": 0.8202, "step": 4246 }, { "epoch": 0.65, "grad_norm": 2.605172358461725, "learning_rate": 5.763211674691296e-06, "loss": 0.8269, "step": 4247 }, { "epoch": 0.65, "grad_norm": 2.539411710086652, "learning_rate": 5.758721594555887e-06, "loss": 0.8579, "step": 4248 }, { "epoch": 0.65, "grad_norm": 2.4711589723884613, "learning_rate": 5.7542325568077576e-06, "loss": 0.7988, "step": 4249 }, { "epoch": 0.65, "grad_norm": 2.605839636785204, "learning_rate": 5.749744562550191e-06, "loss": 0.8018, "step": 4250 }, { "epoch": 0.65, "grad_norm": 2.68865790641465, "learning_rate": 5.745257612886209e-06, "loss": 0.8699, "step": 4251 }, { "epoch": 0.65, "grad_norm": 2.4119373086322318, "learning_rate": 5.740771708918573e-06, "loss": 0.7347, "step": 4252 }, { "epoch": 0.65, "grad_norm": 2.5149040499716757, "learning_rate": 5.7362868517498e-06, "loss": 0.8381, "step": 4253 }, { "epoch": 0.65, "grad_norm": 3.575269057160359, "learning_rate": 5.731803042482135e-06, "loss": 0.8648, "step": 4254 }, { "epoch": 0.65, "grad_norm": 2.492363871218477, "learning_rate": 5.72732028221758e-06, "loss": 0.7981, "step": 4255 }, { "epoch": 0.65, "grad_norm": 2.6274570081759014, "learning_rate": 5.722838572057867e-06, "loss": 0.6948, "step": 4256 }, { "epoch": 0.65, "grad_norm": 2.722018640380298, "learning_rate": 5.718357913104477e-06, "loss": 0.7619, "step": 4257 }, { "epoch": 0.65, "grad_norm": 2.6070857482325307, "learning_rate": 5.713878306458626e-06, "loss": 0.8415, "step": 4258 }, { "epoch": 0.65, "grad_norm": 2.731178106222968, "learning_rate": 5.709399753221282e-06, "loss": 0.8122, "step": 4259 }, { "epoch": 0.65, "grad_norm": 2.5202536126008734, "learning_rate": 5.704922254493139e-06, "loss": 0.9007, "step": 4260 }, { "epoch": 0.65, "grad_norm": 2.763827526804397, "learning_rate": 5.7004458113746485e-06, "loss": 0.7899, "step": 4261 }, { "epoch": 0.65, "grad_norm": 2.9383225693445243, "learning_rate": 5.695970424965993e-06, "loss": 0.7951, "step": 4262 }, { "epoch": 0.65, "grad_norm": 2.673618096203085, "learning_rate": 5.691496096367093e-06, "loss": 0.7874, "step": 4263 }, { "epoch": 0.65, "grad_norm": 2.4528289976047604, "learning_rate": 5.687022826677619e-06, "loss": 0.7333, "step": 4264 }, { "epoch": 0.65, "grad_norm": 2.434025160199743, "learning_rate": 5.682550616996968e-06, "loss": 0.7751, "step": 4265 }, { "epoch": 0.65, "grad_norm": 2.5805781831193646, "learning_rate": 5.678079468424293e-06, "loss": 0.8292, "step": 4266 }, { "epoch": 0.65, "grad_norm": 2.535643452437688, "learning_rate": 5.67360938205847e-06, "loss": 0.7846, "step": 4267 }, { "epoch": 0.65, "grad_norm": 2.7760047936406242, "learning_rate": 5.66914035899812e-06, "loss": 0.7979, "step": 4268 }, { "epoch": 0.65, "grad_norm": 2.3859097315091726, "learning_rate": 5.664672400341614e-06, "loss": 0.7801, "step": 4269 }, { "epoch": 0.65, "grad_norm": 2.8159117598444467, "learning_rate": 5.6602055071870395e-06, "loss": 0.7675, "step": 4270 }, { "epoch": 0.65, "grad_norm": 2.617294328890024, "learning_rate": 5.655739680632233e-06, "loss": 0.7799, "step": 4271 }, { "epoch": 0.65, "grad_norm": 2.5763635450190776, "learning_rate": 5.651274921774777e-06, "loss": 0.8318, "step": 4272 }, { "epoch": 0.65, "grad_norm": 2.7449717860806904, "learning_rate": 5.646811231711982e-06, "loss": 0.7591, "step": 4273 }, { "epoch": 0.65, "grad_norm": 2.627656108484001, "learning_rate": 5.642348611540892e-06, "loss": 0.7857, "step": 4274 }, { "epoch": 0.65, "grad_norm": 2.545063061864533, "learning_rate": 5.637887062358302e-06, "loss": 0.8196, "step": 4275 }, { "epoch": 0.65, "grad_norm": 2.482595594212583, "learning_rate": 5.633426585260728e-06, "loss": 0.8116, "step": 4276 }, { "epoch": 0.65, "grad_norm": 3.047445613173143, "learning_rate": 5.6289671813444376e-06, "loss": 0.8021, "step": 4277 }, { "epoch": 0.65, "grad_norm": 2.422904459979712, "learning_rate": 5.624508851705426e-06, "loss": 0.8128, "step": 4278 }, { "epoch": 0.65, "grad_norm": 2.8670743746634257, "learning_rate": 5.620051597439417e-06, "loss": 0.836, "step": 4279 }, { "epoch": 0.66, "grad_norm": 2.473690554907657, "learning_rate": 5.6155954196418905e-06, "loss": 0.772, "step": 4280 }, { "epoch": 0.66, "grad_norm": 2.6703573989353697, "learning_rate": 5.6111403194080435e-06, "loss": 0.8059, "step": 4281 }, { "epoch": 0.66, "grad_norm": 2.63961123884444, "learning_rate": 5.606686297832817e-06, "loss": 0.7687, "step": 4282 }, { "epoch": 0.66, "grad_norm": 2.7593795892751896, "learning_rate": 5.602233356010883e-06, "loss": 0.8548, "step": 4283 }, { "epoch": 0.66, "grad_norm": 2.516821655894118, "learning_rate": 5.59778149503665e-06, "loss": 0.823, "step": 4284 }, { "epoch": 0.66, "grad_norm": 3.034175184053461, "learning_rate": 5.5933307160042575e-06, "loss": 0.8375, "step": 4285 }, { "epoch": 0.66, "grad_norm": 2.609464382963494, "learning_rate": 5.588881020007588e-06, "loss": 0.7616, "step": 4286 }, { "epoch": 0.66, "grad_norm": 2.5855150647940217, "learning_rate": 5.584432408140246e-06, "loss": 0.7481, "step": 4287 }, { "epoch": 0.66, "grad_norm": 2.7773608715093907, "learning_rate": 5.579984881495582e-06, "loss": 0.7988, "step": 4288 }, { "epoch": 0.66, "grad_norm": 2.7223896843575988, "learning_rate": 5.575538441166671e-06, "loss": 0.7573, "step": 4289 }, { "epoch": 0.66, "grad_norm": 2.690483798746339, "learning_rate": 5.5710930882463174e-06, "loss": 0.8885, "step": 4290 }, { "epoch": 0.66, "grad_norm": 2.422432484428218, "learning_rate": 5.566648823827075e-06, "loss": 0.7379, "step": 4291 }, { "epoch": 0.66, "grad_norm": 2.7445270021125245, "learning_rate": 5.562205649001213e-06, "loss": 0.7741, "step": 4292 }, { "epoch": 0.66, "grad_norm": 2.699609159678072, "learning_rate": 5.5577635648607396e-06, "loss": 0.7712, "step": 4293 }, { "epoch": 0.66, "grad_norm": 3.683136191034804, "learning_rate": 5.553322572497395e-06, "loss": 0.8999, "step": 4294 }, { "epoch": 0.66, "grad_norm": 2.63819629445708, "learning_rate": 5.548882673002651e-06, "loss": 0.8582, "step": 4295 }, { "epoch": 0.66, "grad_norm": 3.4904520900083083, "learning_rate": 5.544443867467705e-06, "loss": 0.9049, "step": 4296 }, { "epoch": 0.66, "grad_norm": 2.677841058226375, "learning_rate": 5.5400061569834995e-06, "loss": 0.7495, "step": 4297 }, { "epoch": 0.66, "grad_norm": 2.6850812833894007, "learning_rate": 5.5355695426406905e-06, "loss": 0.7755, "step": 4298 }, { "epoch": 0.66, "grad_norm": 2.718695969365637, "learning_rate": 5.531134025529684e-06, "loss": 0.9263, "step": 4299 }, { "epoch": 0.66, "grad_norm": 2.6832942199376215, "learning_rate": 5.5266996067405995e-06, "loss": 0.7404, "step": 4300 }, { "epoch": 0.66, "grad_norm": 2.5524843916004953, "learning_rate": 5.522266287363289e-06, "loss": 0.7853, "step": 4301 }, { "epoch": 0.66, "grad_norm": 2.5065546394197145, "learning_rate": 5.517834068487347e-06, "loss": 0.8372, "step": 4302 }, { "epoch": 0.66, "grad_norm": 2.544350067419898, "learning_rate": 5.513402951202082e-06, "loss": 0.7003, "step": 4303 }, { "epoch": 0.66, "grad_norm": 2.7065939446365435, "learning_rate": 5.508972936596542e-06, "loss": 0.6783, "step": 4304 }, { "epoch": 0.66, "grad_norm": 2.5383869242448776, "learning_rate": 5.504544025759498e-06, "loss": 0.7375, "step": 4305 }, { "epoch": 0.66, "grad_norm": 2.591610208015511, "learning_rate": 5.500116219779453e-06, "loss": 0.8246, "step": 4306 }, { "epoch": 0.66, "grad_norm": 2.3648144456733933, "learning_rate": 5.495689519744634e-06, "loss": 0.752, "step": 4307 }, { "epoch": 0.66, "grad_norm": 3.275339281801009, "learning_rate": 5.491263926743005e-06, "loss": 0.7565, "step": 4308 }, { "epoch": 0.66, "grad_norm": 2.6419310382852195, "learning_rate": 5.486839441862248e-06, "loss": 0.7358, "step": 4309 }, { "epoch": 0.66, "grad_norm": 2.5891680783499385, "learning_rate": 5.482416066189783e-06, "loss": 0.7734, "step": 4310 }, { "epoch": 0.66, "grad_norm": 2.8216200785004437, "learning_rate": 5.477993800812749e-06, "loss": 0.8772, "step": 4311 }, { "epoch": 0.66, "grad_norm": 3.640532241676102, "learning_rate": 5.473572646818011e-06, "loss": 0.8411, "step": 4312 }, { "epoch": 0.66, "grad_norm": 2.427155653140146, "learning_rate": 5.4691526052921705e-06, "loss": 0.7412, "step": 4313 }, { "epoch": 0.66, "grad_norm": 2.4904325112756585, "learning_rate": 5.464733677321548e-06, "loss": 0.7864, "step": 4314 }, { "epoch": 0.66, "grad_norm": 2.8079062192354263, "learning_rate": 5.460315863992191e-06, "loss": 0.7866, "step": 4315 }, { "epoch": 0.66, "grad_norm": 2.853060485157894, "learning_rate": 5.455899166389875e-06, "loss": 0.8018, "step": 4316 }, { "epoch": 0.66, "grad_norm": 2.485773737267365, "learning_rate": 5.451483585600096e-06, "loss": 0.7539, "step": 4317 }, { "epoch": 0.66, "grad_norm": 2.797910829886508, "learning_rate": 5.447069122708086e-06, "loss": 0.7892, "step": 4318 }, { "epoch": 0.66, "grad_norm": 2.8726475904531275, "learning_rate": 5.442655778798795e-06, "loss": 0.7287, "step": 4319 }, { "epoch": 0.66, "grad_norm": 2.964910825888747, "learning_rate": 5.438243554956895e-06, "loss": 0.9582, "step": 4320 }, { "epoch": 0.66, "grad_norm": 2.6561943823597725, "learning_rate": 5.433832452266794e-06, "loss": 0.8632, "step": 4321 }, { "epoch": 0.66, "grad_norm": 2.544080014332781, "learning_rate": 5.429422471812612e-06, "loss": 0.8035, "step": 4322 }, { "epoch": 0.66, "grad_norm": 2.826157745342801, "learning_rate": 5.425013614678197e-06, "loss": 0.8831, "step": 4323 }, { "epoch": 0.66, "grad_norm": 3.4112657605218684, "learning_rate": 5.4206058819471276e-06, "loss": 0.797, "step": 4324 }, { "epoch": 0.66, "grad_norm": 2.4430557001274273, "learning_rate": 5.416199274702699e-06, "loss": 0.7924, "step": 4325 }, { "epoch": 0.66, "grad_norm": 2.4328516564445186, "learning_rate": 5.411793794027931e-06, "loss": 0.727, "step": 4326 }, { "epoch": 0.66, "grad_norm": 2.8558225094747867, "learning_rate": 5.407389441005569e-06, "loss": 0.7773, "step": 4327 }, { "epoch": 0.66, "grad_norm": 2.5283966333337706, "learning_rate": 5.402986216718071e-06, "loss": 0.77, "step": 4328 }, { "epoch": 0.66, "grad_norm": 2.728670654557776, "learning_rate": 5.398584122247639e-06, "loss": 0.8477, "step": 4329 }, { "epoch": 0.66, "grad_norm": 2.8334669043005265, "learning_rate": 5.394183158676178e-06, "loss": 0.8122, "step": 4330 }, { "epoch": 0.66, "grad_norm": 2.652568907162666, "learning_rate": 5.389783327085317e-06, "loss": 0.822, "step": 4331 }, { "epoch": 0.66, "grad_norm": 2.3142620945835386, "learning_rate": 5.38538462855642e-06, "loss": 0.6978, "step": 4332 }, { "epoch": 0.66, "grad_norm": 2.7730169314845914, "learning_rate": 5.38098706417056e-06, "loss": 0.807, "step": 4333 }, { "epoch": 0.66, "grad_norm": 2.851402696586342, "learning_rate": 5.376590635008531e-06, "loss": 0.8454, "step": 4334 }, { "epoch": 0.66, "grad_norm": 2.6112912494257206, "learning_rate": 5.3721953421508585e-06, "loss": 0.7354, "step": 4335 }, { "epoch": 0.66, "grad_norm": 2.68768207215782, "learning_rate": 5.367801186677779e-06, "loss": 0.8525, "step": 4336 }, { "epoch": 0.66, "grad_norm": 2.348257730715597, "learning_rate": 5.363408169669253e-06, "loss": 0.7565, "step": 4337 }, { "epoch": 0.66, "grad_norm": 2.5730011308658454, "learning_rate": 5.359016292204962e-06, "loss": 0.7148, "step": 4338 }, { "epoch": 0.66, "grad_norm": 2.820490788623341, "learning_rate": 5.354625555364301e-06, "loss": 0.7555, "step": 4339 }, { "epoch": 0.66, "grad_norm": 2.407342488224494, "learning_rate": 5.3502359602263975e-06, "loss": 0.7254, "step": 4340 }, { "epoch": 0.66, "grad_norm": 2.756876501448233, "learning_rate": 5.345847507870087e-06, "loss": 0.7881, "step": 4341 }, { "epoch": 0.66, "grad_norm": 2.394704047096753, "learning_rate": 5.341460199373925e-06, "loss": 0.7396, "step": 4342 }, { "epoch": 0.66, "grad_norm": 9.638217727897471, "learning_rate": 5.337074035816197e-06, "loss": 0.9129, "step": 4343 }, { "epoch": 0.66, "grad_norm": 2.5261876067230253, "learning_rate": 5.332689018274892e-06, "loss": 0.7824, "step": 4344 }, { "epoch": 0.67, "grad_norm": 2.95205594094719, "learning_rate": 5.3283051478277244e-06, "loss": 0.8592, "step": 4345 }, { "epoch": 0.67, "grad_norm": 2.646327886755659, "learning_rate": 5.323922425552133e-06, "loss": 0.7524, "step": 4346 }, { "epoch": 0.67, "grad_norm": 2.706809301435508, "learning_rate": 5.319540852525264e-06, "loss": 0.8686, "step": 4347 }, { "epoch": 0.67, "grad_norm": 2.881161427865816, "learning_rate": 5.315160429823984e-06, "loss": 0.8364, "step": 4348 }, { "epoch": 0.67, "grad_norm": 2.6895405962270083, "learning_rate": 5.3107811585248806e-06, "loss": 0.7875, "step": 4349 }, { "epoch": 0.67, "grad_norm": 2.628565569216531, "learning_rate": 5.306403039704249e-06, "loss": 0.7816, "step": 4350 }, { "epoch": 0.67, "grad_norm": 2.6822366932950317, "learning_rate": 5.302026074438118e-06, "loss": 0.7017, "step": 4351 }, { "epoch": 0.67, "grad_norm": 2.39036593803038, "learning_rate": 5.297650263802218e-06, "loss": 0.7353, "step": 4352 }, { "epoch": 0.67, "grad_norm": 2.8448153356191828, "learning_rate": 5.293275608871997e-06, "loss": 0.8072, "step": 4353 }, { "epoch": 0.67, "grad_norm": 2.664536900824033, "learning_rate": 5.28890211072263e-06, "loss": 0.8026, "step": 4354 }, { "epoch": 0.67, "grad_norm": 2.513815124466424, "learning_rate": 5.284529770428997e-06, "loss": 0.7549, "step": 4355 }, { "epoch": 0.67, "grad_norm": 2.6869729516841416, "learning_rate": 5.280158589065691e-06, "loss": 0.8434, "step": 4356 }, { "epoch": 0.67, "grad_norm": 3.658999602038804, "learning_rate": 5.275788567707036e-06, "loss": 0.9011, "step": 4357 }, { "epoch": 0.67, "grad_norm": 2.625186278881105, "learning_rate": 5.271419707427056e-06, "loss": 0.8522, "step": 4358 }, { "epoch": 0.67, "grad_norm": 2.8677496677154295, "learning_rate": 5.267052009299494e-06, "loss": 0.827, "step": 4359 }, { "epoch": 0.67, "grad_norm": 3.046371336945512, "learning_rate": 5.2626854743978065e-06, "loss": 0.7203, "step": 4360 }, { "epoch": 0.67, "grad_norm": 2.631751934773863, "learning_rate": 5.258320103795162e-06, "loss": 0.8268, "step": 4361 }, { "epoch": 0.67, "grad_norm": 2.581228394407631, "learning_rate": 5.253955898564456e-06, "loss": 0.8345, "step": 4362 }, { "epoch": 0.67, "grad_norm": 2.8536806404973762, "learning_rate": 5.249592859778281e-06, "loss": 0.9339, "step": 4363 }, { "epoch": 0.67, "grad_norm": 3.381748068008239, "learning_rate": 5.245230988508947e-06, "loss": 0.8614, "step": 4364 }, { "epoch": 0.67, "grad_norm": 2.6552696050449454, "learning_rate": 5.240870285828489e-06, "loss": 0.7775, "step": 4365 }, { "epoch": 0.67, "grad_norm": 2.5855320318893997, "learning_rate": 5.236510752808639e-06, "loss": 0.6907, "step": 4366 }, { "epoch": 0.67, "grad_norm": 2.844571555995099, "learning_rate": 5.232152390520845e-06, "loss": 0.8022, "step": 4367 }, { "epoch": 0.67, "grad_norm": 2.6640972709004926, "learning_rate": 5.227795200036279e-06, "loss": 0.7807, "step": 4368 }, { "epoch": 0.67, "grad_norm": 2.5939197694825897, "learning_rate": 5.223439182425809e-06, "loss": 0.7405, "step": 4369 }, { "epoch": 0.67, "grad_norm": 2.4464154437815178, "learning_rate": 5.219084338760025e-06, "loss": 0.8124, "step": 4370 }, { "epoch": 0.67, "grad_norm": 2.6917914716514706, "learning_rate": 5.214730670109227e-06, "loss": 0.8367, "step": 4371 }, { "epoch": 0.67, "grad_norm": 2.3822181150948696, "learning_rate": 5.210378177543416e-06, "loss": 0.7319, "step": 4372 }, { "epoch": 0.67, "grad_norm": 2.6997257949647064, "learning_rate": 5.206026862132324e-06, "loss": 0.7314, "step": 4373 }, { "epoch": 0.67, "grad_norm": 2.603477960959868, "learning_rate": 5.201676724945377e-06, "loss": 0.8084, "step": 4374 }, { "epoch": 0.67, "grad_norm": 2.6164521221019186, "learning_rate": 5.197327767051713e-06, "loss": 0.7139, "step": 4375 }, { "epoch": 0.67, "grad_norm": 2.661036351334042, "learning_rate": 5.192979989520193e-06, "loss": 0.7833, "step": 4376 }, { "epoch": 0.67, "grad_norm": 2.609577223160548, "learning_rate": 5.188633393419371e-06, "loss": 0.8232, "step": 4377 }, { "epoch": 0.67, "grad_norm": 2.481068145446453, "learning_rate": 5.184287979817519e-06, "loss": 0.7904, "step": 4378 }, { "epoch": 0.67, "grad_norm": 2.989034486269615, "learning_rate": 5.179943749782623e-06, "loss": 0.803, "step": 4379 }, { "epoch": 0.67, "grad_norm": 2.7008982367335648, "learning_rate": 5.175600704382371e-06, "loss": 0.804, "step": 4380 }, { "epoch": 0.67, "grad_norm": 2.5881479536178684, "learning_rate": 5.171258844684155e-06, "loss": 0.8173, "step": 4381 }, { "epoch": 0.67, "grad_norm": 2.813293310899786, "learning_rate": 5.166918171755097e-06, "loss": 0.7653, "step": 4382 }, { "epoch": 0.67, "grad_norm": 2.844039680058116, "learning_rate": 5.1625786866619955e-06, "loss": 0.8475, "step": 4383 }, { "epoch": 0.67, "grad_norm": 3.2181719756764, "learning_rate": 5.158240390471385e-06, "loss": 0.7642, "step": 4384 }, { "epoch": 0.67, "grad_norm": 2.606973473588288, "learning_rate": 5.153903284249495e-06, "loss": 0.712, "step": 4385 }, { "epoch": 0.67, "grad_norm": 3.2000365788149314, "learning_rate": 5.149567369062261e-06, "loss": 0.686, "step": 4386 }, { "epoch": 0.67, "grad_norm": 3.9467566513323096, "learning_rate": 5.145232645975336e-06, "loss": 0.8953, "step": 4387 }, { "epoch": 0.67, "grad_norm": 2.7557886814227865, "learning_rate": 5.140899116054068e-06, "loss": 0.8869, "step": 4388 }, { "epoch": 0.67, "grad_norm": 2.674056263695096, "learning_rate": 5.136566780363515e-06, "loss": 0.7777, "step": 4389 }, { "epoch": 0.67, "grad_norm": 2.617838450703711, "learning_rate": 5.1322356399684525e-06, "loss": 0.8102, "step": 4390 }, { "epoch": 0.67, "grad_norm": 2.8643287708523495, "learning_rate": 5.127905695933343e-06, "loss": 0.7876, "step": 4391 }, { "epoch": 0.67, "grad_norm": 3.567823294869154, "learning_rate": 5.123576949322375e-06, "loss": 0.9056, "step": 4392 }, { "epoch": 0.67, "grad_norm": 2.687122412429552, "learning_rate": 5.119249401199428e-06, "loss": 0.7666, "step": 4393 }, { "epoch": 0.67, "grad_norm": 2.5083630162065336, "learning_rate": 5.114923052628092e-06, "loss": 0.7502, "step": 4394 }, { "epoch": 0.67, "grad_norm": 2.464663395166503, "learning_rate": 5.110597904671664e-06, "loss": 0.745, "step": 4395 }, { "epoch": 0.67, "grad_norm": 2.8353050509079107, "learning_rate": 5.106273958393142e-06, "loss": 0.7786, "step": 4396 }, { "epoch": 0.67, "grad_norm": 2.872102247273557, "learning_rate": 5.101951214855226e-06, "loss": 0.8282, "step": 4397 }, { "epoch": 0.67, "grad_norm": 2.730776359515011, "learning_rate": 5.097629675120336e-06, "loss": 0.8991, "step": 4398 }, { "epoch": 0.67, "grad_norm": 2.7406506612238153, "learning_rate": 5.093309340250578e-06, "loss": 0.8758, "step": 4399 }, { "epoch": 0.67, "grad_norm": 2.773308560374021, "learning_rate": 5.0889902113077695e-06, "loss": 0.8074, "step": 4400 }, { "epoch": 0.67, "grad_norm": 2.845091034005279, "learning_rate": 5.084672289353435e-06, "loss": 0.8554, "step": 4401 }, { "epoch": 0.67, "grad_norm": 2.7187441930311462, "learning_rate": 5.080355575448792e-06, "loss": 0.7727, "step": 4402 }, { "epoch": 0.67, "grad_norm": 2.5786635844494543, "learning_rate": 5.076040070654778e-06, "loss": 0.8989, "step": 4403 }, { "epoch": 0.67, "grad_norm": 3.5742272068838417, "learning_rate": 5.071725776032015e-06, "loss": 0.7542, "step": 4404 }, { "epoch": 0.67, "grad_norm": 2.389808986839853, "learning_rate": 5.067412692640839e-06, "loss": 0.7614, "step": 4405 }, { "epoch": 0.67, "grad_norm": 2.754195864365841, "learning_rate": 5.063100821541281e-06, "loss": 0.8037, "step": 4406 }, { "epoch": 0.67, "grad_norm": 2.867365347124313, "learning_rate": 5.058790163793083e-06, "loss": 0.744, "step": 4407 }, { "epoch": 0.67, "grad_norm": 2.629332336481846, "learning_rate": 5.054480720455677e-06, "loss": 0.7491, "step": 4408 }, { "epoch": 0.67, "grad_norm": 2.603839244071808, "learning_rate": 5.05017249258821e-06, "loss": 0.9164, "step": 4409 }, { "epoch": 0.68, "grad_norm": 2.702141953179618, "learning_rate": 5.045865481249523e-06, "loss": 0.784, "step": 4410 }, { "epoch": 0.68, "grad_norm": 3.5178397816644646, "learning_rate": 5.041559687498152e-06, "loss": 0.8736, "step": 4411 }, { "epoch": 0.68, "grad_norm": 2.5196806497253896, "learning_rate": 5.03725511239235e-06, "loss": 0.8599, "step": 4412 }, { "epoch": 0.68, "grad_norm": 3.4533128431782645, "learning_rate": 5.03295175699005e-06, "loss": 0.8331, "step": 4413 }, { "epoch": 0.68, "grad_norm": 2.5491905479689083, "learning_rate": 5.0286496223489075e-06, "loss": 0.8176, "step": 4414 }, { "epoch": 0.68, "grad_norm": 2.667131326142279, "learning_rate": 5.0243487095262615e-06, "loss": 0.8368, "step": 4415 }, { "epoch": 0.68, "grad_norm": 2.507835981837654, "learning_rate": 5.020049019579154e-06, "loss": 0.8017, "step": 4416 }, { "epoch": 0.68, "grad_norm": 2.7554033825056803, "learning_rate": 5.015750553564331e-06, "loss": 0.8758, "step": 4417 }, { "epoch": 0.68, "grad_norm": 2.5901696392138494, "learning_rate": 5.011453312538233e-06, "loss": 0.7932, "step": 4418 }, { "epoch": 0.68, "grad_norm": 2.6740393141937977, "learning_rate": 5.007157297556997e-06, "loss": 0.725, "step": 4419 }, { "epoch": 0.68, "grad_norm": 2.805224200929592, "learning_rate": 5.002862509676471e-06, "loss": 0.8622, "step": 4420 }, { "epoch": 0.68, "grad_norm": 2.675242937946728, "learning_rate": 4.998568949952192e-06, "loss": 0.8288, "step": 4421 }, { "epoch": 0.68, "grad_norm": 3.6625074354824894, "learning_rate": 4.99427661943939e-06, "loss": 0.8326, "step": 4422 }, { "epoch": 0.68, "grad_norm": 2.4338348325708035, "learning_rate": 4.989985519193008e-06, "loss": 0.7297, "step": 4423 }, { "epoch": 0.68, "grad_norm": 2.762201107569858, "learning_rate": 4.98569565026767e-06, "loss": 0.8451, "step": 4424 }, { "epoch": 0.68, "grad_norm": 2.6072736360156683, "learning_rate": 4.981407013717714e-06, "loss": 0.7758, "step": 4425 }, { "epoch": 0.68, "grad_norm": 2.758225364934191, "learning_rate": 4.977119610597163e-06, "loss": 0.8361, "step": 4426 }, { "epoch": 0.68, "grad_norm": 2.542021886033243, "learning_rate": 4.972833441959739e-06, "loss": 0.7494, "step": 4427 }, { "epoch": 0.68, "grad_norm": 2.8372559940748414, "learning_rate": 4.968548508858863e-06, "loss": 0.7811, "step": 4428 }, { "epoch": 0.68, "grad_norm": 2.851293790673043, "learning_rate": 4.964264812347651e-06, "loss": 0.6002, "step": 4429 }, { "epoch": 0.68, "grad_norm": 2.5694384446828313, "learning_rate": 4.959982353478911e-06, "loss": 0.7817, "step": 4430 }, { "epoch": 0.68, "grad_norm": 2.8666717398217996, "learning_rate": 4.955701133305162e-06, "loss": 0.8643, "step": 4431 }, { "epoch": 0.68, "grad_norm": 2.521520650821122, "learning_rate": 4.9514211528786e-06, "loss": 0.8003, "step": 4432 }, { "epoch": 0.68, "grad_norm": 2.361515154255389, "learning_rate": 4.9471424132511224e-06, "loss": 0.7376, "step": 4433 }, { "epoch": 0.68, "grad_norm": 2.510640568551166, "learning_rate": 4.942864915474331e-06, "loss": 0.827, "step": 4434 }, { "epoch": 0.68, "grad_norm": 2.53168907265518, "learning_rate": 4.9385886605995075e-06, "loss": 0.7962, "step": 4435 }, { "epoch": 0.68, "grad_norm": 2.392625479649822, "learning_rate": 4.934313649677641e-06, "loss": 0.6972, "step": 4436 }, { "epoch": 0.68, "grad_norm": 2.537304145927643, "learning_rate": 4.9300398837594076e-06, "loss": 0.771, "step": 4437 }, { "epoch": 0.68, "grad_norm": 2.610679280539628, "learning_rate": 4.925767363895179e-06, "loss": 0.7721, "step": 4438 }, { "epoch": 0.68, "grad_norm": 2.9022231835808507, "learning_rate": 4.9214960911350185e-06, "loss": 0.753, "step": 4439 }, { "epoch": 0.68, "grad_norm": 2.7496681277602684, "learning_rate": 4.917226066528689e-06, "loss": 0.7837, "step": 4440 }, { "epoch": 0.68, "grad_norm": 2.6893999122809604, "learning_rate": 4.912957291125635e-06, "loss": 0.7454, "step": 4441 }, { "epoch": 0.68, "grad_norm": 2.740717636172206, "learning_rate": 4.908689765975012e-06, "loss": 0.807, "step": 4442 }, { "epoch": 0.68, "grad_norm": 2.4983590928601926, "learning_rate": 4.904423492125653e-06, "loss": 0.7373, "step": 4443 }, { "epoch": 0.68, "grad_norm": 2.6187129268123734, "learning_rate": 4.900158470626085e-06, "loss": 0.7673, "step": 4444 }, { "epoch": 0.68, "grad_norm": 2.6988355035420315, "learning_rate": 4.895894702524538e-06, "loss": 0.7332, "step": 4445 }, { "epoch": 0.68, "grad_norm": 2.802231068626159, "learning_rate": 4.891632188868921e-06, "loss": 0.7753, "step": 4446 }, { "epoch": 0.68, "grad_norm": 2.4975003336574, "learning_rate": 4.887370930706845e-06, "loss": 0.7797, "step": 4447 }, { "epoch": 0.68, "grad_norm": 2.9680313023184275, "learning_rate": 4.8831109290856046e-06, "loss": 0.8899, "step": 4448 }, { "epoch": 0.68, "grad_norm": 2.727326968393698, "learning_rate": 4.8788521850521904e-06, "loss": 0.826, "step": 4449 }, { "epoch": 0.68, "grad_norm": 2.7302688744437136, "learning_rate": 4.874594699653281e-06, "loss": 0.8682, "step": 4450 }, { "epoch": 0.68, "grad_norm": 3.492593665785906, "learning_rate": 4.870338473935246e-06, "loss": 0.7884, "step": 4451 }, { "epoch": 0.68, "grad_norm": 2.5666077831783607, "learning_rate": 4.866083508944145e-06, "loss": 0.7734, "step": 4452 }, { "epoch": 0.68, "grad_norm": 2.9646458483096367, "learning_rate": 4.8618298057257355e-06, "loss": 0.6841, "step": 4453 }, { "epoch": 0.68, "grad_norm": 2.566984184996757, "learning_rate": 4.857577365325452e-06, "loss": 0.7161, "step": 4454 }, { "epoch": 0.68, "grad_norm": 3.576733376569184, "learning_rate": 4.853326188788425e-06, "loss": 0.8825, "step": 4455 }, { "epoch": 0.68, "grad_norm": 2.8115505630557847, "learning_rate": 4.849076277159481e-06, "loss": 0.9294, "step": 4456 }, { "epoch": 0.68, "grad_norm": 2.944850469212431, "learning_rate": 4.844827631483121e-06, "loss": 0.7999, "step": 4457 }, { "epoch": 0.68, "grad_norm": 2.708885597821722, "learning_rate": 4.840580252803552e-06, "loss": 0.7353, "step": 4458 }, { "epoch": 0.68, "grad_norm": 2.944819532038206, "learning_rate": 4.836334142164654e-06, "loss": 0.8249, "step": 4459 }, { "epoch": 0.68, "grad_norm": 2.739948538835541, "learning_rate": 4.832089300610003e-06, "loss": 0.8552, "step": 4460 }, { "epoch": 0.68, "grad_norm": 2.7541977968082025, "learning_rate": 4.8278457291828625e-06, "loss": 0.7478, "step": 4461 }, { "epoch": 0.68, "grad_norm": 2.765934044481929, "learning_rate": 4.823603428926185e-06, "loss": 0.8427, "step": 4462 }, { "epoch": 0.68, "grad_norm": 2.62684539168531, "learning_rate": 4.819362400882602e-06, "loss": 0.7443, "step": 4463 }, { "epoch": 0.68, "grad_norm": 2.666412253891732, "learning_rate": 4.815122646094448e-06, "loss": 0.8608, "step": 4464 }, { "epoch": 0.68, "grad_norm": 2.58003459090976, "learning_rate": 4.8108841656037295e-06, "loss": 0.829, "step": 4465 }, { "epoch": 0.68, "grad_norm": 2.540279478810456, "learning_rate": 4.806646960452151e-06, "loss": 0.7526, "step": 4466 }, { "epoch": 0.68, "grad_norm": 2.5625038092096126, "learning_rate": 4.802411031681099e-06, "loss": 0.8165, "step": 4467 }, { "epoch": 0.68, "grad_norm": 2.9700586139058855, "learning_rate": 4.798176380331638e-06, "loss": 0.7515, "step": 4468 }, { "epoch": 0.68, "grad_norm": 2.8773827775527314, "learning_rate": 4.793943007444536e-06, "loss": 0.6777, "step": 4469 }, { "epoch": 0.68, "grad_norm": 2.6582906590904454, "learning_rate": 4.789710914060234e-06, "loss": 0.8083, "step": 4470 }, { "epoch": 0.68, "grad_norm": 2.528804350071189, "learning_rate": 4.785480101218861e-06, "loss": 0.7438, "step": 4471 }, { "epoch": 0.68, "grad_norm": 2.542769406287971, "learning_rate": 4.781250569960233e-06, "loss": 0.8081, "step": 4472 }, { "epoch": 0.68, "grad_norm": 2.693399220911003, "learning_rate": 4.777022321323849e-06, "loss": 0.9128, "step": 4473 }, { "epoch": 0.68, "grad_norm": 2.6851606809802746, "learning_rate": 4.77279535634889e-06, "loss": 0.8111, "step": 4474 }, { "epoch": 0.68, "grad_norm": 2.5435598261018204, "learning_rate": 4.768569676074235e-06, "loss": 0.764, "step": 4475 }, { "epoch": 0.69, "grad_norm": 2.584009644323771, "learning_rate": 4.764345281538428e-06, "loss": 0.7162, "step": 4476 }, { "epoch": 0.69, "grad_norm": 2.62466241921889, "learning_rate": 4.760122173779715e-06, "loss": 0.8222, "step": 4477 }, { "epoch": 0.69, "grad_norm": 2.533256258740026, "learning_rate": 4.755900353836015e-06, "loss": 0.7704, "step": 4478 }, { "epoch": 0.69, "grad_norm": 3.861712170497619, "learning_rate": 4.751679822744928e-06, "loss": 0.8504, "step": 4479 }, { "epoch": 0.69, "grad_norm": 2.7018656507970547, "learning_rate": 4.747460581543749e-06, "loss": 0.7879, "step": 4480 }, { "epoch": 0.69, "grad_norm": 2.7388923120779887, "learning_rate": 4.743242631269445e-06, "loss": 0.7889, "step": 4481 }, { "epoch": 0.69, "grad_norm": 2.855257683308518, "learning_rate": 4.739025972958673e-06, "loss": 0.8472, "step": 4482 }, { "epoch": 0.69, "grad_norm": 2.845673836028014, "learning_rate": 4.734810607647766e-06, "loss": 0.7232, "step": 4483 }, { "epoch": 0.69, "grad_norm": 2.825542516978436, "learning_rate": 4.730596536372745e-06, "loss": 0.7726, "step": 4484 }, { "epoch": 0.69, "grad_norm": 2.3857235505175716, "learning_rate": 4.726383760169304e-06, "loss": 0.7034, "step": 4485 }, { "epoch": 0.69, "grad_norm": 2.530643930906867, "learning_rate": 4.722172280072835e-06, "loss": 0.7931, "step": 4486 }, { "epoch": 0.69, "grad_norm": 2.657113264744651, "learning_rate": 4.717962097118394e-06, "loss": 0.8575, "step": 4487 }, { "epoch": 0.69, "grad_norm": 2.713785602161177, "learning_rate": 4.713753212340732e-06, "loss": 0.8137, "step": 4488 }, { "epoch": 0.69, "grad_norm": 2.5858389823910195, "learning_rate": 4.709545626774273e-06, "loss": 0.8343, "step": 4489 }, { "epoch": 0.69, "grad_norm": 2.556127744178826, "learning_rate": 4.705339341453119e-06, "loss": 0.7033, "step": 4490 }, { "epoch": 0.69, "grad_norm": 2.370308093136086, "learning_rate": 4.701134357411065e-06, "loss": 0.7872, "step": 4491 }, { "epoch": 0.69, "grad_norm": 2.740060254795805, "learning_rate": 4.696930675681571e-06, "loss": 0.7007, "step": 4492 }, { "epoch": 0.69, "grad_norm": 2.632838666024672, "learning_rate": 4.692728297297785e-06, "loss": 0.8182, "step": 4493 }, { "epoch": 0.69, "grad_norm": 2.703412372670818, "learning_rate": 4.6885272232925426e-06, "loss": 0.8383, "step": 4494 }, { "epoch": 0.69, "grad_norm": 2.549367641767443, "learning_rate": 4.68432745469834e-06, "loss": 0.8128, "step": 4495 }, { "epoch": 0.69, "grad_norm": 2.5904592992356466, "learning_rate": 4.68012899254736e-06, "loss": 0.7876, "step": 4496 }, { "epoch": 0.69, "grad_norm": 2.8401619973861996, "learning_rate": 4.675931837871477e-06, "loss": 0.7499, "step": 4497 }, { "epoch": 0.69, "grad_norm": 3.0015771959374127, "learning_rate": 4.671735991702225e-06, "loss": 0.9088, "step": 4498 }, { "epoch": 0.69, "grad_norm": 3.5500184314648617, "learning_rate": 4.667541455070834e-06, "loss": 0.7474, "step": 4499 }, { "epoch": 0.69, "grad_norm": 2.6213439037770105, "learning_rate": 4.663348229008199e-06, "loss": 0.7026, "step": 4500 }, { "epoch": 0.69, "grad_norm": 2.6287716051634447, "learning_rate": 4.659156314544893e-06, "loss": 0.8177, "step": 4501 }, { "epoch": 0.69, "grad_norm": 2.459424953526767, "learning_rate": 4.654965712711178e-06, "loss": 0.7863, "step": 4502 }, { "epoch": 0.69, "grad_norm": 2.587294325712288, "learning_rate": 4.6507764245369855e-06, "loss": 0.8057, "step": 4503 }, { "epoch": 0.69, "grad_norm": 3.396969034639195, "learning_rate": 4.646588451051919e-06, "loss": 0.7544, "step": 4504 }, { "epoch": 0.69, "grad_norm": 2.6668852980000266, "learning_rate": 4.642401793285271e-06, "loss": 0.8272, "step": 4505 }, { "epoch": 0.69, "grad_norm": 2.5722160264858815, "learning_rate": 4.6382164522660055e-06, "loss": 0.7214, "step": 4506 }, { "epoch": 0.69, "grad_norm": 2.8202500748321375, "learning_rate": 4.634032429022758e-06, "loss": 0.8385, "step": 4507 }, { "epoch": 0.69, "grad_norm": 4.265580443189982, "learning_rate": 4.629849724583846e-06, "loss": 0.9186, "step": 4508 }, { "epoch": 0.69, "grad_norm": 2.7453839692216855, "learning_rate": 4.625668339977255e-06, "loss": 0.7932, "step": 4509 }, { "epoch": 0.69, "grad_norm": 2.625301799758122, "learning_rate": 4.621488276230662e-06, "loss": 0.8853, "step": 4510 }, { "epoch": 0.69, "grad_norm": 2.4718008492512764, "learning_rate": 4.617309534371404e-06, "loss": 0.7119, "step": 4511 }, { "epoch": 0.69, "grad_norm": 2.641255755127557, "learning_rate": 4.613132115426496e-06, "loss": 0.7424, "step": 4512 }, { "epoch": 0.69, "grad_norm": 2.5223496054941363, "learning_rate": 4.608956020422638e-06, "loss": 0.7684, "step": 4513 }, { "epoch": 0.69, "grad_norm": 2.561888272311924, "learning_rate": 4.604781250386191e-06, "loss": 0.8153, "step": 4514 }, { "epoch": 0.69, "grad_norm": 2.4367407854667174, "learning_rate": 4.600607806343196e-06, "loss": 0.682, "step": 4515 }, { "epoch": 0.69, "grad_norm": 2.7086669415237523, "learning_rate": 4.596435689319374e-06, "loss": 0.8494, "step": 4516 }, { "epoch": 0.69, "grad_norm": 2.840924918015386, "learning_rate": 4.59226490034011e-06, "loss": 0.8094, "step": 4517 }, { "epoch": 0.69, "grad_norm": 2.3463652209327797, "learning_rate": 4.588095440430469e-06, "loss": 0.7552, "step": 4518 }, { "epoch": 0.69, "grad_norm": 2.608067751840558, "learning_rate": 4.583927310615185e-06, "loss": 0.8524, "step": 4519 }, { "epoch": 0.69, "grad_norm": 2.5728811216388427, "learning_rate": 4.579760511918666e-06, "loss": 0.7234, "step": 4520 }, { "epoch": 0.69, "grad_norm": 2.688885009168033, "learning_rate": 4.575595045365e-06, "loss": 0.8409, "step": 4521 }, { "epoch": 0.69, "grad_norm": 2.5940511926032985, "learning_rate": 4.5714309119779385e-06, "loss": 0.7412, "step": 4522 }, { "epoch": 0.69, "grad_norm": 2.6332325498634135, "learning_rate": 4.567268112780906e-06, "loss": 0.8335, "step": 4523 }, { "epoch": 0.69, "grad_norm": 2.7237828697539728, "learning_rate": 4.563106648797008e-06, "loss": 0.8518, "step": 4524 }, { "epoch": 0.69, "grad_norm": 2.5714001553129493, "learning_rate": 4.558946521049011e-06, "loss": 0.8423, "step": 4525 }, { "epoch": 0.69, "grad_norm": 2.6647391578755824, "learning_rate": 4.554787730559357e-06, "loss": 0.8729, "step": 4526 }, { "epoch": 0.69, "grad_norm": 4.669061939275215, "learning_rate": 4.550630278350165e-06, "loss": 0.9408, "step": 4527 }, { "epoch": 0.69, "grad_norm": 2.508578076573883, "learning_rate": 4.546474165443219e-06, "loss": 0.8723, "step": 4528 }, { "epoch": 0.69, "grad_norm": 2.687921052688467, "learning_rate": 4.542319392859972e-06, "loss": 0.7999, "step": 4529 }, { "epoch": 0.69, "grad_norm": 2.492568968275793, "learning_rate": 4.538165961621552e-06, "loss": 0.7753, "step": 4530 }, { "epoch": 0.69, "grad_norm": 2.526572412400771, "learning_rate": 4.5340138727487505e-06, "loss": 0.8778, "step": 4531 }, { "epoch": 0.69, "grad_norm": 2.872178179667303, "learning_rate": 4.529863127262045e-06, "loss": 0.7821, "step": 4532 }, { "epoch": 0.69, "grad_norm": 2.6176834783726637, "learning_rate": 4.525713726181567e-06, "loss": 0.7875, "step": 4533 }, { "epoch": 0.69, "grad_norm": 2.7386225860310667, "learning_rate": 4.521565670527119e-06, "loss": 0.896, "step": 4534 }, { "epoch": 0.69, "grad_norm": 2.5227923641482493, "learning_rate": 4.517418961318185e-06, "loss": 0.8407, "step": 4535 }, { "epoch": 0.69, "grad_norm": 2.91481708532559, "learning_rate": 4.513273599573906e-06, "loss": 0.853, "step": 4536 }, { "epoch": 0.69, "grad_norm": 2.4922437195264537, "learning_rate": 4.50912958631309e-06, "loss": 0.7683, "step": 4537 }, { "epoch": 0.69, "grad_norm": 2.526874141534352, "learning_rate": 4.504986922554229e-06, "loss": 0.7781, "step": 4538 }, { "epoch": 0.69, "grad_norm": 2.4559635537854567, "learning_rate": 4.500845609315468e-06, "loss": 0.8497, "step": 4539 }, { "epoch": 0.69, "grad_norm": 2.524651553011375, "learning_rate": 4.496705647614628e-06, "loss": 0.6886, "step": 4540 }, { "epoch": 0.7, "grad_norm": 2.8576812336465296, "learning_rate": 4.492567038469194e-06, "loss": 0.7955, "step": 4541 }, { "epoch": 0.7, "grad_norm": 2.7588167195369295, "learning_rate": 4.488429782896315e-06, "loss": 0.7579, "step": 4542 }, { "epoch": 0.7, "grad_norm": 2.5805819596962962, "learning_rate": 4.484293881912823e-06, "loss": 0.8285, "step": 4543 }, { "epoch": 0.7, "grad_norm": 2.5669736328115, "learning_rate": 4.4801593365352e-06, "loss": 0.8195, "step": 4544 }, { "epoch": 0.7, "grad_norm": 2.645156566188588, "learning_rate": 4.4760261477796e-06, "loss": 0.782, "step": 4545 }, { "epoch": 0.7, "grad_norm": 2.8599751400315565, "learning_rate": 4.47189431666185e-06, "loss": 0.7474, "step": 4546 }, { "epoch": 0.7, "grad_norm": 2.618919615293604, "learning_rate": 4.4677638441974344e-06, "loss": 0.7622, "step": 4547 }, { "epoch": 0.7, "grad_norm": 2.5745283079506494, "learning_rate": 4.463634731401506e-06, "loss": 0.7358, "step": 4548 }, { "epoch": 0.7, "grad_norm": 2.768223479539422, "learning_rate": 4.459506979288891e-06, "loss": 0.8084, "step": 4549 }, { "epoch": 0.7, "grad_norm": 2.66595138445493, "learning_rate": 4.455380588874072e-06, "loss": 0.859, "step": 4550 }, { "epoch": 0.7, "grad_norm": 2.5302475002778815, "learning_rate": 4.451255561171199e-06, "loss": 0.8463, "step": 4551 }, { "epoch": 0.7, "grad_norm": 2.828214551033672, "learning_rate": 4.447131897194089e-06, "loss": 0.8653, "step": 4552 }, { "epoch": 0.7, "grad_norm": 2.577556922731538, "learning_rate": 4.443009597956219e-06, "loss": 0.8231, "step": 4553 }, { "epoch": 0.7, "grad_norm": 2.616616110701205, "learning_rate": 4.4388886644707415e-06, "loss": 0.8405, "step": 4554 }, { "epoch": 0.7, "grad_norm": 2.6355056781615405, "learning_rate": 4.434769097750463e-06, "loss": 0.9045, "step": 4555 }, { "epoch": 0.7, "grad_norm": 2.464200613183843, "learning_rate": 4.4306508988078545e-06, "loss": 0.7632, "step": 4556 }, { "epoch": 0.7, "grad_norm": 2.5987579746298515, "learning_rate": 4.42653406865506e-06, "loss": 0.7914, "step": 4557 }, { "epoch": 0.7, "grad_norm": 3.396848709039805, "learning_rate": 4.422418608303878e-06, "loss": 0.8756, "step": 4558 }, { "epoch": 0.7, "grad_norm": 2.564281271524151, "learning_rate": 4.418304518765768e-06, "loss": 0.766, "step": 4559 }, { "epoch": 0.7, "grad_norm": 2.893019951497073, "learning_rate": 4.414191801051868e-06, "loss": 0.9191, "step": 4560 }, { "epoch": 0.7, "grad_norm": 2.643477472378628, "learning_rate": 4.410080456172963e-06, "loss": 0.7728, "step": 4561 }, { "epoch": 0.7, "grad_norm": 3.191959381783351, "learning_rate": 4.4059704851395066e-06, "loss": 0.7745, "step": 4562 }, { "epoch": 0.7, "grad_norm": 2.7803203795034017, "learning_rate": 4.401861888961614e-06, "loss": 0.8424, "step": 4563 }, { "epoch": 0.7, "grad_norm": 2.6668926762762557, "learning_rate": 4.39775466864906e-06, "loss": 0.8434, "step": 4564 }, { "epoch": 0.7, "grad_norm": 3.2259726953810026, "learning_rate": 4.393648825211292e-06, "loss": 0.8012, "step": 4565 }, { "epoch": 0.7, "grad_norm": 2.485281211765713, "learning_rate": 4.389544359657406e-06, "loss": 0.7123, "step": 4566 }, { "epoch": 0.7, "grad_norm": 2.7361707651748524, "learning_rate": 4.385441272996163e-06, "loss": 0.8163, "step": 4567 }, { "epoch": 0.7, "grad_norm": 2.6342215617089617, "learning_rate": 4.381339566235991e-06, "loss": 0.7742, "step": 4568 }, { "epoch": 0.7, "grad_norm": 3.4953478324533265, "learning_rate": 4.3772392403849725e-06, "loss": 0.8489, "step": 4569 }, { "epoch": 0.7, "grad_norm": 2.8254277012049007, "learning_rate": 4.373140296450849e-06, "loss": 0.7949, "step": 4570 }, { "epoch": 0.7, "grad_norm": 2.447166580679297, "learning_rate": 4.369042735441034e-06, "loss": 0.6744, "step": 4571 }, { "epoch": 0.7, "grad_norm": 2.5148934014001267, "learning_rate": 4.364946558362587e-06, "loss": 0.7639, "step": 4572 }, { "epoch": 0.7, "grad_norm": 2.631815990263399, "learning_rate": 4.360851766222236e-06, "loss": 0.7963, "step": 4573 }, { "epoch": 0.7, "grad_norm": 2.7870802311280447, "learning_rate": 4.356758360026364e-06, "loss": 0.7633, "step": 4574 }, { "epoch": 0.7, "grad_norm": 2.6983783957529743, "learning_rate": 4.352666340781014e-06, "loss": 0.7501, "step": 4575 }, { "epoch": 0.7, "grad_norm": 2.7473802269485996, "learning_rate": 4.348575709491895e-06, "loss": 0.7177, "step": 4576 }, { "epoch": 0.7, "grad_norm": 2.7337053133225835, "learning_rate": 4.344486467164367e-06, "loss": 0.8403, "step": 4577 }, { "epoch": 0.7, "grad_norm": 2.5316406942155516, "learning_rate": 4.340398614803446e-06, "loss": 0.7514, "step": 4578 }, { "epoch": 0.7, "grad_norm": 2.616088844833526, "learning_rate": 4.336312153413821e-06, "loss": 0.735, "step": 4579 }, { "epoch": 0.7, "grad_norm": 2.5590860827417563, "learning_rate": 4.332227083999824e-06, "loss": 0.7324, "step": 4580 }, { "epoch": 0.7, "grad_norm": 2.823463575405153, "learning_rate": 4.328143407565446e-06, "loss": 0.8872, "step": 4581 }, { "epoch": 0.7, "grad_norm": 2.5845287530717598, "learning_rate": 4.3240611251143504e-06, "loss": 0.8051, "step": 4582 }, { "epoch": 0.7, "grad_norm": 2.8261552773288603, "learning_rate": 4.319980237649842e-06, "loss": 0.8085, "step": 4583 }, { "epoch": 0.7, "grad_norm": 2.281298269414887, "learning_rate": 4.315900746174891e-06, "loss": 0.7678, "step": 4584 }, { "epoch": 0.7, "grad_norm": 2.7115424664869425, "learning_rate": 4.311822651692117e-06, "loss": 0.8394, "step": 4585 }, { "epoch": 0.7, "grad_norm": 2.7292516435578658, "learning_rate": 4.307745955203802e-06, "loss": 0.7851, "step": 4586 }, { "epoch": 0.7, "grad_norm": 2.867908725524442, "learning_rate": 4.30367065771189e-06, "loss": 0.9014, "step": 4587 }, { "epoch": 0.7, "grad_norm": 2.539392293579164, "learning_rate": 4.299596760217971e-06, "loss": 0.9133, "step": 4588 }, { "epoch": 0.7, "grad_norm": 2.595803782835126, "learning_rate": 4.295524263723289e-06, "loss": 0.8473, "step": 4589 }, { "epoch": 0.7, "grad_norm": 4.051305067876277, "learning_rate": 4.291453169228761e-06, "loss": 0.953, "step": 4590 }, { "epoch": 0.7, "grad_norm": 2.556206565324167, "learning_rate": 4.287383477734941e-06, "loss": 0.767, "step": 4591 }, { "epoch": 0.7, "grad_norm": 3.4954943318160723, "learning_rate": 4.283315190242043e-06, "loss": 0.8481, "step": 4592 }, { "epoch": 0.7, "grad_norm": 2.58254843562837, "learning_rate": 4.279248307749942e-06, "loss": 0.7255, "step": 4593 }, { "epoch": 0.7, "grad_norm": 2.438739075744588, "learning_rate": 4.275182831258165e-06, "loss": 0.7969, "step": 4594 }, { "epoch": 0.7, "grad_norm": 2.7256701612456022, "learning_rate": 4.2711187617658874e-06, "loss": 0.7933, "step": 4595 }, { "epoch": 0.7, "grad_norm": 2.501946269627395, "learning_rate": 4.2670561002719465e-06, "loss": 0.7289, "step": 4596 }, { "epoch": 0.7, "grad_norm": 2.6861185679974433, "learning_rate": 4.262994847774826e-06, "loss": 0.7463, "step": 4597 }, { "epoch": 0.7, "grad_norm": 3.6513030959735073, "learning_rate": 4.258935005272677e-06, "loss": 0.86, "step": 4598 }, { "epoch": 0.7, "grad_norm": 2.5170003857043337, "learning_rate": 4.254876573763287e-06, "loss": 0.8417, "step": 4599 }, { "epoch": 0.7, "grad_norm": 2.5795342270973713, "learning_rate": 4.250819554244104e-06, "loss": 0.7703, "step": 4600 }, { "epoch": 0.7, "grad_norm": 2.2948253468316304, "learning_rate": 4.2467639477122365e-06, "loss": 0.7226, "step": 4601 }, { "epoch": 0.7, "grad_norm": 2.426464752391824, "learning_rate": 4.242709755164436e-06, "loss": 0.7803, "step": 4602 }, { "epoch": 0.7, "grad_norm": 2.542434876466958, "learning_rate": 4.238656977597104e-06, "loss": 0.7254, "step": 4603 }, { "epoch": 0.7, "grad_norm": 2.6932781263047523, "learning_rate": 4.234605616006309e-06, "loss": 0.8503, "step": 4604 }, { "epoch": 0.7, "grad_norm": 2.596101290817688, "learning_rate": 4.230555671387752e-06, "loss": 0.7327, "step": 4605 }, { "epoch": 0.71, "grad_norm": 2.517592945930184, "learning_rate": 4.2265071447368075e-06, "loss": 0.7414, "step": 4606 }, { "epoch": 0.71, "grad_norm": 2.6225552220576747, "learning_rate": 4.222460037048481e-06, "loss": 0.7787, "step": 4607 }, { "epoch": 0.71, "grad_norm": 3.682718897322715, "learning_rate": 4.218414349317435e-06, "loss": 0.8763, "step": 4608 }, { "epoch": 0.71, "grad_norm": 2.539578696281544, "learning_rate": 4.214370082537996e-06, "loss": 0.7671, "step": 4609 }, { "epoch": 0.71, "grad_norm": 2.4552266766915336, "learning_rate": 4.210327237704127e-06, "loss": 0.7058, "step": 4610 }, { "epoch": 0.71, "grad_norm": 2.450334006455967, "learning_rate": 4.206285815809442e-06, "loss": 0.7708, "step": 4611 }, { "epoch": 0.71, "grad_norm": 2.5675380169728568, "learning_rate": 4.202245817847216e-06, "loss": 0.7293, "step": 4612 }, { "epoch": 0.71, "grad_norm": 2.8543763293592983, "learning_rate": 4.198207244810359e-06, "loss": 0.7996, "step": 4613 }, { "epoch": 0.71, "grad_norm": 2.5847582955676844, "learning_rate": 4.194170097691449e-06, "loss": 0.8363, "step": 4614 }, { "epoch": 0.71, "grad_norm": 2.607985959941781, "learning_rate": 4.190134377482697e-06, "loss": 0.8314, "step": 4615 }, { "epoch": 0.71, "grad_norm": 2.5181490067330885, "learning_rate": 4.186100085175969e-06, "loss": 0.7393, "step": 4616 }, { "epoch": 0.71, "grad_norm": 3.7186874436591073, "learning_rate": 4.182067221762787e-06, "loss": 0.7471, "step": 4617 }, { "epoch": 0.71, "grad_norm": 2.631842047505717, "learning_rate": 4.17803578823431e-06, "loss": 0.8025, "step": 4618 }, { "epoch": 0.71, "grad_norm": 2.681078894285222, "learning_rate": 4.174005785581355e-06, "loss": 0.8648, "step": 4619 }, { "epoch": 0.71, "grad_norm": 2.5036096911340064, "learning_rate": 4.169977214794381e-06, "loss": 0.7002, "step": 4620 }, { "epoch": 0.71, "grad_norm": 2.4533717993242927, "learning_rate": 4.165950076863498e-06, "loss": 0.7863, "step": 4621 }, { "epoch": 0.71, "grad_norm": 2.5486747992946803, "learning_rate": 4.161924372778461e-06, "loss": 0.8162, "step": 4622 }, { "epoch": 0.71, "grad_norm": 2.8385199190995167, "learning_rate": 4.157900103528681e-06, "loss": 0.826, "step": 4623 }, { "epoch": 0.71, "grad_norm": 2.652735195152087, "learning_rate": 4.153877270103205e-06, "loss": 0.8017, "step": 4624 }, { "epoch": 0.71, "grad_norm": 2.8035117018749363, "learning_rate": 4.149855873490739e-06, "loss": 0.7838, "step": 4625 }, { "epoch": 0.71, "grad_norm": 2.8177744943240155, "learning_rate": 4.145835914679624e-06, "loss": 0.7602, "step": 4626 }, { "epoch": 0.71, "grad_norm": 2.702208093817233, "learning_rate": 4.141817394657854e-06, "loss": 0.7792, "step": 4627 }, { "epoch": 0.71, "grad_norm": 2.4106613142451736, "learning_rate": 4.137800314413072e-06, "loss": 0.7998, "step": 4628 }, { "epoch": 0.71, "grad_norm": 2.4824405757189627, "learning_rate": 4.1337846749325615e-06, "loss": 0.7364, "step": 4629 }, { "epoch": 0.71, "grad_norm": 2.422022193044217, "learning_rate": 4.129770477203253e-06, "loss": 0.7391, "step": 4630 }, { "epoch": 0.71, "grad_norm": 2.391710828345458, "learning_rate": 4.125757722211725e-06, "loss": 0.751, "step": 4631 }, { "epoch": 0.71, "grad_norm": 2.650588884023421, "learning_rate": 4.1217464109442e-06, "loss": 0.7595, "step": 4632 }, { "epoch": 0.71, "grad_norm": 2.5578125265610416, "learning_rate": 4.11773654438654e-06, "loss": 0.8018, "step": 4633 }, { "epoch": 0.71, "grad_norm": 2.728508450144264, "learning_rate": 4.113728123524269e-06, "loss": 0.7575, "step": 4634 }, { "epoch": 0.71, "grad_norm": 2.8877626647567696, "learning_rate": 4.1097211493425335e-06, "loss": 0.8282, "step": 4635 }, { "epoch": 0.71, "grad_norm": 2.6971660585681816, "learning_rate": 4.105715622826144e-06, "loss": 0.8925, "step": 4636 }, { "epoch": 0.71, "grad_norm": 3.6891685662456877, "learning_rate": 4.101711544959544e-06, "loss": 0.8454, "step": 4637 }, { "epoch": 0.71, "grad_norm": 2.6052488039847974, "learning_rate": 4.097708916726818e-06, "loss": 0.7644, "step": 4638 }, { "epoch": 0.71, "grad_norm": 2.500823823477732, "learning_rate": 4.093707739111709e-06, "loss": 0.7763, "step": 4639 }, { "epoch": 0.71, "grad_norm": 3.1481662271475703, "learning_rate": 4.0897080130975885e-06, "loss": 0.7999, "step": 4640 }, { "epoch": 0.71, "grad_norm": 2.3819445548136193, "learning_rate": 4.0857097396674785e-06, "loss": 0.6348, "step": 4641 }, { "epoch": 0.71, "grad_norm": 3.0016153917782913, "learning_rate": 4.081712919804042e-06, "loss": 0.8293, "step": 4642 }, { "epoch": 0.71, "grad_norm": 3.361916696935901, "learning_rate": 4.077717554489585e-06, "loss": 0.8334, "step": 4643 }, { "epoch": 0.71, "grad_norm": 3.0327023999462233, "learning_rate": 4.073723644706052e-06, "loss": 0.8209, "step": 4644 }, { "epoch": 0.71, "grad_norm": 2.7807531192439456, "learning_rate": 4.069731191435043e-06, "loss": 0.83, "step": 4645 }, { "epoch": 0.71, "grad_norm": 2.6526815820931104, "learning_rate": 4.065740195657784e-06, "loss": 0.7878, "step": 4646 }, { "epoch": 0.71, "grad_norm": 2.441045340728361, "learning_rate": 4.061750658355154e-06, "loss": 0.7812, "step": 4647 }, { "epoch": 0.71, "grad_norm": 2.6057736597483467, "learning_rate": 4.057762580507669e-06, "loss": 0.8392, "step": 4648 }, { "epoch": 0.71, "grad_norm": 2.5718849496433167, "learning_rate": 4.053775963095482e-06, "loss": 0.7951, "step": 4649 }, { "epoch": 0.71, "grad_norm": 2.9414300092669694, "learning_rate": 4.049790807098398e-06, "loss": 0.7309, "step": 4650 }, { "epoch": 0.71, "grad_norm": 2.6655740741989957, "learning_rate": 4.045807113495854e-06, "loss": 0.7683, "step": 4651 }, { "epoch": 0.71, "grad_norm": 2.620533608701822, "learning_rate": 4.04182488326693e-06, "loss": 0.7355, "step": 4652 }, { "epoch": 0.71, "grad_norm": 2.6881651036970546, "learning_rate": 4.037844117390346e-06, "loss": 0.7604, "step": 4653 }, { "epoch": 0.71, "grad_norm": 3.1868902215429236, "learning_rate": 4.033864816844464e-06, "loss": 0.8145, "step": 4654 }, { "epoch": 0.71, "grad_norm": 2.596476042767454, "learning_rate": 4.029886982607278e-06, "loss": 0.7789, "step": 4655 }, { "epoch": 0.71, "grad_norm": 2.479689038232687, "learning_rate": 4.025910615656439e-06, "loss": 0.7502, "step": 4656 }, { "epoch": 0.71, "grad_norm": 4.330613762856095, "learning_rate": 4.021935716969217e-06, "loss": 0.9225, "step": 4657 }, { "epoch": 0.71, "grad_norm": 2.4228114155369878, "learning_rate": 4.017962287522538e-06, "loss": 0.7204, "step": 4658 }, { "epoch": 0.71, "grad_norm": 2.4409170638890645, "learning_rate": 4.013990328292956e-06, "loss": 0.7502, "step": 4659 }, { "epoch": 0.71, "grad_norm": 2.702036399975697, "learning_rate": 4.010019840256665e-06, "loss": 0.7252, "step": 4660 }, { "epoch": 0.71, "grad_norm": 2.7340055852492298, "learning_rate": 4.006050824389504e-06, "loss": 0.7979, "step": 4661 }, { "epoch": 0.71, "grad_norm": 2.6003021686125054, "learning_rate": 4.002083281666944e-06, "loss": 0.8506, "step": 4662 }, { "epoch": 0.71, "grad_norm": 2.7024305304131317, "learning_rate": 3.998117213064096e-06, "loss": 0.775, "step": 4663 }, { "epoch": 0.71, "grad_norm": 2.3818260809227723, "learning_rate": 3.994152619555708e-06, "loss": 0.7644, "step": 4664 }, { "epoch": 0.71, "grad_norm": 2.4210541149434954, "learning_rate": 3.990189502116165e-06, "loss": 0.6938, "step": 4665 }, { "epoch": 0.71, "grad_norm": 2.8001810887021468, "learning_rate": 3.986227861719489e-06, "loss": 0.7897, "step": 4666 }, { "epoch": 0.71, "grad_norm": 2.638390741878354, "learning_rate": 3.982267699339344e-06, "loss": 0.8287, "step": 4667 }, { "epoch": 0.71, "grad_norm": 2.5404188437109227, "learning_rate": 3.978309015949022e-06, "loss": 0.7848, "step": 4668 }, { "epoch": 0.71, "grad_norm": 2.725734600082859, "learning_rate": 3.974351812521462e-06, "loss": 0.8366, "step": 4669 }, { "epoch": 0.71, "grad_norm": 2.6029752762499974, "learning_rate": 3.970396090029231e-06, "loss": 0.8097, "step": 4670 }, { "epoch": 0.71, "grad_norm": 2.5952620908186947, "learning_rate": 3.9664418494445325e-06, "loss": 0.8636, "step": 4671 }, { "epoch": 0.72, "grad_norm": 2.7985463975433755, "learning_rate": 3.9624890917392125e-06, "loss": 0.877, "step": 4672 }, { "epoch": 0.72, "grad_norm": 3.290152520574593, "learning_rate": 3.958537817884744e-06, "loss": 0.8129, "step": 4673 }, { "epoch": 0.72, "grad_norm": 2.6227573346481288, "learning_rate": 3.9545880288522435e-06, "loss": 0.78, "step": 4674 }, { "epoch": 0.72, "grad_norm": 3.245561388143318, "learning_rate": 3.950639725612453e-06, "loss": 0.827, "step": 4675 }, { "epoch": 0.72, "grad_norm": 2.6497248469105936, "learning_rate": 3.9466929091357585e-06, "loss": 0.8241, "step": 4676 }, { "epoch": 0.72, "grad_norm": 2.7854720270745683, "learning_rate": 3.942747580392172e-06, "loss": 0.76, "step": 4677 }, { "epoch": 0.72, "grad_norm": 6.453180020069248, "learning_rate": 3.938803740351351e-06, "loss": 0.8078, "step": 4678 }, { "epoch": 0.72, "grad_norm": 2.537980143611081, "learning_rate": 3.934861389982576e-06, "loss": 0.779, "step": 4679 }, { "epoch": 0.72, "grad_norm": 3.012175652906596, "learning_rate": 3.93092053025477e-06, "loss": 0.7289, "step": 4680 }, { "epoch": 0.72, "grad_norm": 2.5317863879725326, "learning_rate": 3.926981162136485e-06, "loss": 0.703, "step": 4681 }, { "epoch": 0.72, "grad_norm": 2.8787146878385266, "learning_rate": 3.923043286595902e-06, "loss": 0.7585, "step": 4682 }, { "epoch": 0.72, "grad_norm": 2.7757103864171007, "learning_rate": 3.919106904600849e-06, "loss": 0.8586, "step": 4683 }, { "epoch": 0.72, "grad_norm": 2.675438143895143, "learning_rate": 3.915172017118773e-06, "loss": 0.7801, "step": 4684 }, { "epoch": 0.72, "grad_norm": 2.523180775382848, "learning_rate": 3.91123862511676e-06, "loss": 0.6941, "step": 4685 }, { "epoch": 0.72, "grad_norm": 2.5315978159852053, "learning_rate": 3.907306729561528e-06, "loss": 0.8646, "step": 4686 }, { "epoch": 0.72, "grad_norm": 2.519907492637914, "learning_rate": 3.903376331419421e-06, "loss": 0.6856, "step": 4687 }, { "epoch": 0.72, "grad_norm": 2.5523043949939592, "learning_rate": 3.89944743165643e-06, "loss": 0.77, "step": 4688 }, { "epoch": 0.72, "grad_norm": 2.5257770082553246, "learning_rate": 3.895520031238163e-06, "loss": 0.7387, "step": 4689 }, { "epoch": 0.72, "grad_norm": 2.7803624676951646, "learning_rate": 3.891594131129863e-06, "loss": 0.8689, "step": 4690 }, { "epoch": 0.72, "grad_norm": 2.652024181046161, "learning_rate": 3.8876697322964115e-06, "loss": 0.8326, "step": 4691 }, { "epoch": 0.72, "grad_norm": 2.517253856703637, "learning_rate": 3.883746835702314e-06, "loss": 0.6858, "step": 4692 }, { "epoch": 0.72, "grad_norm": 2.472269564693469, "learning_rate": 3.879825442311704e-06, "loss": 0.7663, "step": 4693 }, { "epoch": 0.72, "grad_norm": 2.6853471906503454, "learning_rate": 3.875905553088357e-06, "loss": 0.7294, "step": 4694 }, { "epoch": 0.72, "grad_norm": 2.796561653655558, "learning_rate": 3.871987168995668e-06, "loss": 0.8129, "step": 4695 }, { "epoch": 0.72, "grad_norm": 2.6928289478681053, "learning_rate": 3.868070290996666e-06, "loss": 0.8477, "step": 4696 }, { "epoch": 0.72, "grad_norm": 2.84870512570839, "learning_rate": 3.86415492005401e-06, "loss": 0.8197, "step": 4697 }, { "epoch": 0.72, "grad_norm": 2.739562097828992, "learning_rate": 3.860241057129987e-06, "loss": 0.7791, "step": 4698 }, { "epoch": 0.72, "grad_norm": 2.897960792207024, "learning_rate": 3.856328703186517e-06, "loss": 0.8865, "step": 4699 }, { "epoch": 0.72, "grad_norm": 2.3354076946120386, "learning_rate": 3.852417859185148e-06, "loss": 0.6997, "step": 4700 }, { "epoch": 0.72, "grad_norm": 2.4224047577876915, "learning_rate": 3.848508526087049e-06, "loss": 0.7737, "step": 4701 }, { "epoch": 0.72, "grad_norm": 2.7918517638326215, "learning_rate": 3.844600704853033e-06, "loss": 0.7162, "step": 4702 }, { "epoch": 0.72, "grad_norm": 2.8138739928062635, "learning_rate": 3.840694396443529e-06, "loss": 0.8866, "step": 4703 }, { "epoch": 0.72, "grad_norm": 2.456079097072732, "learning_rate": 3.836789601818596e-06, "loss": 0.8912, "step": 4704 }, { "epoch": 0.72, "grad_norm": 2.6774767199549454, "learning_rate": 3.832886321937927e-06, "loss": 0.8849, "step": 4705 }, { "epoch": 0.72, "grad_norm": 2.5867290705841226, "learning_rate": 3.828984557760838e-06, "loss": 0.7677, "step": 4706 }, { "epoch": 0.72, "grad_norm": 2.547855076935108, "learning_rate": 3.825084310246271e-06, "loss": 0.7806, "step": 4707 }, { "epoch": 0.72, "grad_norm": 2.373974068705608, "learning_rate": 3.8211855803528e-06, "loss": 0.7564, "step": 4708 }, { "epoch": 0.72, "grad_norm": 2.5217411177994697, "learning_rate": 3.817288369038617e-06, "loss": 0.8766, "step": 4709 }, { "epoch": 0.72, "grad_norm": 2.5738411725657246, "learning_rate": 3.813392677261557e-06, "loss": 0.8039, "step": 4710 }, { "epoch": 0.72, "grad_norm": 2.7923442043533715, "learning_rate": 3.8094985059790656e-06, "loss": 0.7569, "step": 4711 }, { "epoch": 0.72, "grad_norm": 2.4916064610001065, "learning_rate": 3.8056058561482203e-06, "loss": 0.8459, "step": 4712 }, { "epoch": 0.72, "grad_norm": 2.6814032775087977, "learning_rate": 3.8017147287257294e-06, "loss": 0.7843, "step": 4713 }, { "epoch": 0.72, "grad_norm": 2.7524991249628963, "learning_rate": 3.7978251246679223e-06, "loss": 0.7788, "step": 4714 }, { "epoch": 0.72, "grad_norm": 3.2629708574261396, "learning_rate": 3.7939370449307477e-06, "loss": 0.8294, "step": 4715 }, { "epoch": 0.72, "grad_norm": 2.4710880212646478, "learning_rate": 3.7900504904697964e-06, "loss": 0.8517, "step": 4716 }, { "epoch": 0.72, "grad_norm": 2.693293620784226, "learning_rate": 3.78616546224027e-06, "loss": 0.8109, "step": 4717 }, { "epoch": 0.72, "grad_norm": 3.02761216391464, "learning_rate": 3.782281961197e-06, "loss": 0.8265, "step": 4718 }, { "epoch": 0.72, "grad_norm": 2.646484955340347, "learning_rate": 3.77839998829444e-06, "loss": 0.7394, "step": 4719 }, { "epoch": 0.72, "grad_norm": 2.4578067727484534, "learning_rate": 3.774519544486669e-06, "loss": 0.8447, "step": 4720 }, { "epoch": 0.72, "grad_norm": 2.6699138109227407, "learning_rate": 3.7706406307273978e-06, "loss": 0.8107, "step": 4721 }, { "epoch": 0.72, "grad_norm": 2.630772578360136, "learning_rate": 3.76676324796995e-06, "loss": 0.7524, "step": 4722 }, { "epoch": 0.72, "grad_norm": 2.7318104679393693, "learning_rate": 3.762887397167274e-06, "loss": 0.7799, "step": 4723 }, { "epoch": 0.72, "grad_norm": 2.762270216428474, "learning_rate": 3.759013079271955e-06, "loss": 0.8176, "step": 4724 }, { "epoch": 0.72, "grad_norm": 2.4553305532030305, "learning_rate": 3.7551402952361837e-06, "loss": 0.728, "step": 4725 }, { "epoch": 0.72, "grad_norm": 2.6450089954589697, "learning_rate": 3.751269046011782e-06, "loss": 0.8179, "step": 4726 }, { "epoch": 0.72, "grad_norm": 2.343328022644456, "learning_rate": 3.7473993325502e-06, "loss": 0.789, "step": 4727 }, { "epoch": 0.72, "grad_norm": 3.9286072628824007, "learning_rate": 3.7435311558025013e-06, "loss": 0.9128, "step": 4728 }, { "epoch": 0.72, "grad_norm": 2.879903474320907, "learning_rate": 3.739664516719371e-06, "loss": 0.7496, "step": 4729 }, { "epoch": 0.72, "grad_norm": 2.4443020986868316, "learning_rate": 3.735799416251129e-06, "loss": 0.7422, "step": 4730 }, { "epoch": 0.72, "grad_norm": 2.7236021790499807, "learning_rate": 3.7319358553477036e-06, "loss": 0.6851, "step": 4731 }, { "epoch": 0.72, "grad_norm": 2.419018248039533, "learning_rate": 3.728073834958652e-06, "loss": 0.7411, "step": 4732 }, { "epoch": 0.72, "grad_norm": 3.024391110209093, "learning_rate": 3.7242133560331474e-06, "loss": 0.7883, "step": 4733 }, { "epoch": 0.72, "grad_norm": 2.9023134680031646, "learning_rate": 3.720354419519986e-06, "loss": 0.768, "step": 4734 }, { "epoch": 0.72, "grad_norm": 2.576821852078959, "learning_rate": 3.7164970263675927e-06, "loss": 0.65, "step": 4735 }, { "epoch": 0.72, "grad_norm": 2.59943955832557, "learning_rate": 3.7126411775240034e-06, "loss": 0.8005, "step": 4736 }, { "epoch": 0.73, "grad_norm": 2.733894188548558, "learning_rate": 3.7087868739368727e-06, "loss": 0.7918, "step": 4737 }, { "epoch": 0.73, "grad_norm": 2.5152087590772974, "learning_rate": 3.704934116553488e-06, "loss": 0.7243, "step": 4738 }, { "epoch": 0.73, "grad_norm": 2.270981881776336, "learning_rate": 3.7010829063207464e-06, "loss": 0.6764, "step": 4739 }, { "epoch": 0.73, "grad_norm": 3.092183390138597, "learning_rate": 3.6972332441851633e-06, "loss": 0.8128, "step": 4740 }, { "epoch": 0.73, "grad_norm": 2.6356895097666913, "learning_rate": 3.6933851310928835e-06, "loss": 0.7741, "step": 4741 }, { "epoch": 0.73, "grad_norm": 2.5035855263563644, "learning_rate": 3.6895385679896634e-06, "loss": 0.7324, "step": 4742 }, { "epoch": 0.73, "grad_norm": 2.709479031414277, "learning_rate": 3.6856935558208805e-06, "loss": 0.7627, "step": 4743 }, { "epoch": 0.73, "grad_norm": 2.536976789978986, "learning_rate": 3.6818500955315295e-06, "loss": 0.811, "step": 4744 }, { "epoch": 0.73, "grad_norm": 2.613504187688428, "learning_rate": 3.678008188066222e-06, "loss": 0.794, "step": 4745 }, { "epoch": 0.73, "grad_norm": 2.6157031627687535, "learning_rate": 3.6741678343691987e-06, "loss": 0.8401, "step": 4746 }, { "epoch": 0.73, "grad_norm": 2.667752066641231, "learning_rate": 3.670329035384308e-06, "loss": 0.7574, "step": 4747 }, { "epoch": 0.73, "grad_norm": 2.542434226381904, "learning_rate": 3.6664917920550138e-06, "loss": 0.6986, "step": 4748 }, { "epoch": 0.73, "grad_norm": 2.6856041087077585, "learning_rate": 3.6626561053244102e-06, "loss": 0.9379, "step": 4749 }, { "epoch": 0.73, "grad_norm": 2.4464421333190356, "learning_rate": 3.6588219761351997e-06, "loss": 0.765, "step": 4750 }, { "epoch": 0.73, "grad_norm": 2.4903426439491416, "learning_rate": 3.6549894054296987e-06, "loss": 0.751, "step": 4751 }, { "epoch": 0.73, "grad_norm": 2.7515001060158353, "learning_rate": 3.651158394149852e-06, "loss": 0.7518, "step": 4752 }, { "epoch": 0.73, "grad_norm": 3.0337445733466644, "learning_rate": 3.6473289432372127e-06, "loss": 0.7475, "step": 4753 }, { "epoch": 0.73, "grad_norm": 2.395247071177547, "learning_rate": 3.643501053632952e-06, "loss": 0.6996, "step": 4754 }, { "epoch": 0.73, "grad_norm": 2.489066127142281, "learning_rate": 3.6396747262778565e-06, "loss": 0.7531, "step": 4755 }, { "epoch": 0.73, "grad_norm": 2.840828936358385, "learning_rate": 3.635849962112329e-06, "loss": 0.7807, "step": 4756 }, { "epoch": 0.73, "grad_norm": 3.4415939387484573, "learning_rate": 3.6320267620763948e-06, "loss": 0.8277, "step": 4757 }, { "epoch": 0.73, "grad_norm": 2.7229664144214722, "learning_rate": 3.628205127109685e-06, "loss": 0.8241, "step": 4758 }, { "epoch": 0.73, "grad_norm": 2.5781982638832446, "learning_rate": 3.6243850581514497e-06, "loss": 0.825, "step": 4759 }, { "epoch": 0.73, "grad_norm": 2.5764834094870714, "learning_rate": 3.620566556140558e-06, "loss": 0.8209, "step": 4760 }, { "epoch": 0.73, "grad_norm": 2.68885570646689, "learning_rate": 3.616749622015486e-06, "loss": 0.6946, "step": 4761 }, { "epoch": 0.73, "grad_norm": 2.4970988849394127, "learning_rate": 3.6129342567143354e-06, "loss": 0.7678, "step": 4762 }, { "epoch": 0.73, "grad_norm": 2.6849650774427007, "learning_rate": 3.609120461174813e-06, "loss": 0.7518, "step": 4763 }, { "epoch": 0.73, "grad_norm": 2.6180254178226208, "learning_rate": 3.6053082363342416e-06, "loss": 0.7673, "step": 4764 }, { "epoch": 0.73, "grad_norm": 2.7240023181640893, "learning_rate": 3.601497583129561e-06, "loss": 0.8813, "step": 4765 }, { "epoch": 0.73, "grad_norm": 2.4256243336055787, "learning_rate": 3.5976885024973196e-06, "loss": 0.7399, "step": 4766 }, { "epoch": 0.73, "grad_norm": 2.5106095364830523, "learning_rate": 3.593880995373683e-06, "loss": 0.6945, "step": 4767 }, { "epoch": 0.73, "grad_norm": 3.789038453137751, "learning_rate": 3.5900750626944346e-06, "loss": 0.8602, "step": 4768 }, { "epoch": 0.73, "grad_norm": 2.7024293451721766, "learning_rate": 3.586270705394962e-06, "loss": 0.8042, "step": 4769 }, { "epoch": 0.73, "grad_norm": 2.5976861428370888, "learning_rate": 3.5824679244102677e-06, "loss": 0.7232, "step": 4770 }, { "epoch": 0.73, "grad_norm": 2.71288602620368, "learning_rate": 3.5786667206749736e-06, "loss": 0.8084, "step": 4771 }, { "epoch": 0.73, "grad_norm": 2.6366829492883608, "learning_rate": 3.5748670951233043e-06, "loss": 0.7183, "step": 4772 }, { "epoch": 0.73, "grad_norm": 2.804276556323464, "learning_rate": 3.5710690486891066e-06, "loss": 0.8149, "step": 4773 }, { "epoch": 0.73, "grad_norm": 2.6738106485757993, "learning_rate": 3.5672725823058297e-06, "loss": 0.8348, "step": 4774 }, { "epoch": 0.73, "grad_norm": 2.6069776651382854, "learning_rate": 3.56347769690654e-06, "loss": 0.713, "step": 4775 }, { "epoch": 0.73, "grad_norm": 2.6979836985747507, "learning_rate": 3.5596843934239133e-06, "loss": 0.7818, "step": 4776 }, { "epoch": 0.73, "grad_norm": 2.7598334527062054, "learning_rate": 3.5558926727902366e-06, "loss": 0.8516, "step": 4777 }, { "epoch": 0.73, "grad_norm": 3.3131812110524224, "learning_rate": 3.5521025359374074e-06, "loss": 0.8712, "step": 4778 }, { "epoch": 0.73, "grad_norm": 2.4949648400791204, "learning_rate": 3.548313983796938e-06, "loss": 0.7782, "step": 4779 }, { "epoch": 0.73, "grad_norm": 2.5630884401307656, "learning_rate": 3.544527017299949e-06, "loss": 0.8788, "step": 4780 }, { "epoch": 0.73, "grad_norm": 2.911939959801433, "learning_rate": 3.5407416373771643e-06, "loss": 0.8761, "step": 4781 }, { "epoch": 0.73, "grad_norm": 2.742301645711473, "learning_rate": 3.5369578449589325e-06, "loss": 0.7713, "step": 4782 }, { "epoch": 0.73, "grad_norm": 2.418614212609283, "learning_rate": 3.533175640975196e-06, "loss": 0.7763, "step": 4783 }, { "epoch": 0.73, "grad_norm": 3.0920357376682004, "learning_rate": 3.529395026355521e-06, "loss": 0.8437, "step": 4784 }, { "epoch": 0.73, "grad_norm": 2.458293186069364, "learning_rate": 3.525616002029073e-06, "loss": 0.7363, "step": 4785 }, { "epoch": 0.73, "grad_norm": 2.8596590025593063, "learning_rate": 3.5218385689246326e-06, "loss": 0.8724, "step": 4786 }, { "epoch": 0.73, "grad_norm": 2.9193255631234507, "learning_rate": 3.5180627279705835e-06, "loss": 0.7714, "step": 4787 }, { "epoch": 0.73, "grad_norm": 2.79476377638554, "learning_rate": 3.514288480094924e-06, "loss": 0.8012, "step": 4788 }, { "epoch": 0.73, "grad_norm": 2.6235771755809973, "learning_rate": 3.5105158262252537e-06, "loss": 0.8237, "step": 4789 }, { "epoch": 0.73, "grad_norm": 2.5787053452761564, "learning_rate": 3.506744767288792e-06, "loss": 0.7667, "step": 4790 }, { "epoch": 0.73, "grad_norm": 2.6403708336838543, "learning_rate": 3.502975304212357e-06, "loss": 0.7564, "step": 4791 }, { "epoch": 0.73, "grad_norm": 2.637237157385171, "learning_rate": 3.499207437922373e-06, "loss": 0.73, "step": 4792 }, { "epoch": 0.73, "grad_norm": 2.5619777184569874, "learning_rate": 3.495441169344882e-06, "loss": 0.8118, "step": 4793 }, { "epoch": 0.73, "grad_norm": 2.688433925814969, "learning_rate": 3.491676499405522e-06, "loss": 0.7487, "step": 4794 }, { "epoch": 0.73, "grad_norm": 2.603657981617799, "learning_rate": 3.487913429029548e-06, "loss": 0.8036, "step": 4795 }, { "epoch": 0.73, "grad_norm": 2.876068212142599, "learning_rate": 3.484151959141815e-06, "loss": 0.8381, "step": 4796 }, { "epoch": 0.73, "grad_norm": 2.7161938139739186, "learning_rate": 3.4803920906667865e-06, "loss": 0.7448, "step": 4797 }, { "epoch": 0.73, "grad_norm": 2.4956209199321697, "learning_rate": 3.4766338245285335e-06, "loss": 0.6899, "step": 4798 }, { "epoch": 0.73, "grad_norm": 2.591375363966042, "learning_rate": 3.4728771616507317e-06, "loss": 0.7904, "step": 4799 }, { "epoch": 0.73, "grad_norm": 2.7401520195665023, "learning_rate": 3.46912210295666e-06, "loss": 0.7921, "step": 4800 }, { "epoch": 0.73, "grad_norm": 2.4448228135941834, "learning_rate": 3.4653686493692128e-06, "loss": 0.7103, "step": 4801 }, { "epoch": 0.74, "grad_norm": 2.962985582453866, "learning_rate": 3.461616801810882e-06, "loss": 0.7812, "step": 4802 }, { "epoch": 0.74, "grad_norm": 2.5877756856635648, "learning_rate": 3.457866561203761e-06, "loss": 0.7902, "step": 4803 }, { "epoch": 0.74, "grad_norm": 2.879005707932505, "learning_rate": 3.4541179284695624e-06, "loss": 0.8495, "step": 4804 }, { "epoch": 0.74, "grad_norm": 2.589230441682058, "learning_rate": 3.4503709045295874e-06, "loss": 0.7214, "step": 4805 }, { "epoch": 0.74, "grad_norm": 2.599459866748018, "learning_rate": 3.4466254903047558e-06, "loss": 0.7829, "step": 4806 }, { "epoch": 0.74, "grad_norm": 2.790768839452864, "learning_rate": 3.4428816867155835e-06, "loss": 0.7689, "step": 4807 }, { "epoch": 0.74, "grad_norm": 3.1540500764058215, "learning_rate": 3.43913949468219e-06, "loss": 0.8027, "step": 4808 }, { "epoch": 0.74, "grad_norm": 2.816907689724522, "learning_rate": 3.4353989151243027e-06, "loss": 0.7883, "step": 4809 }, { "epoch": 0.74, "grad_norm": 2.72704029894962, "learning_rate": 3.431659948961251e-06, "loss": 0.8028, "step": 4810 }, { "epoch": 0.74, "grad_norm": 2.583137093090374, "learning_rate": 3.4279225971119654e-06, "loss": 0.7438, "step": 4811 }, { "epoch": 0.74, "grad_norm": 2.4804854730280765, "learning_rate": 3.424186860494987e-06, "loss": 0.7215, "step": 4812 }, { "epoch": 0.74, "grad_norm": 2.658327666872961, "learning_rate": 3.4204527400284537e-06, "loss": 0.7776, "step": 4813 }, { "epoch": 0.74, "grad_norm": 2.693389347062505, "learning_rate": 3.4167202366301023e-06, "loss": 0.703, "step": 4814 }, { "epoch": 0.74, "grad_norm": 2.566126480384414, "learning_rate": 3.4129893512172853e-06, "loss": 0.8688, "step": 4815 }, { "epoch": 0.74, "grad_norm": 2.585160035850379, "learning_rate": 3.4092600847069423e-06, "loss": 0.6803, "step": 4816 }, { "epoch": 0.74, "grad_norm": 2.6303955696871135, "learning_rate": 3.4055324380156307e-06, "loss": 0.8045, "step": 4817 }, { "epoch": 0.74, "grad_norm": 2.4160626378845333, "learning_rate": 3.401806412059496e-06, "loss": 0.6905, "step": 4818 }, { "epoch": 0.74, "grad_norm": 2.441459359441314, "learning_rate": 3.398082007754292e-06, "loss": 0.7945, "step": 4819 }, { "epoch": 0.74, "grad_norm": 2.625386501191365, "learning_rate": 3.3943592260153734e-06, "loss": 0.7157, "step": 4820 }, { "epoch": 0.74, "grad_norm": 3.440194805471606, "learning_rate": 3.3906380677576946e-06, "loss": 0.8603, "step": 4821 }, { "epoch": 0.74, "grad_norm": 2.694702386247108, "learning_rate": 3.386918533895809e-06, "loss": 0.7045, "step": 4822 }, { "epoch": 0.74, "grad_norm": 3.1686746071454057, "learning_rate": 3.38320062534388e-06, "loss": 0.7979, "step": 4823 }, { "epoch": 0.74, "grad_norm": 2.4610000489643724, "learning_rate": 3.379484343015662e-06, "loss": 0.7281, "step": 4824 }, { "epoch": 0.74, "grad_norm": 2.880245536591487, "learning_rate": 3.3757696878245105e-06, "loss": 0.7813, "step": 4825 }, { "epoch": 0.74, "grad_norm": 2.5397694675753377, "learning_rate": 3.3720566606833893e-06, "loss": 0.7543, "step": 4826 }, { "epoch": 0.74, "grad_norm": 2.5502516158533943, "learning_rate": 3.3683452625048508e-06, "loss": 0.7224, "step": 4827 }, { "epoch": 0.74, "grad_norm": 2.6288489855836685, "learning_rate": 3.3646354942010574e-06, "loss": 0.8899, "step": 4828 }, { "epoch": 0.74, "grad_norm": 2.724002193740201, "learning_rate": 3.360927356683763e-06, "loss": 0.7548, "step": 4829 }, { "epoch": 0.74, "grad_norm": 2.6394485502907252, "learning_rate": 3.3572208508643254e-06, "loss": 0.8039, "step": 4830 }, { "epoch": 0.74, "grad_norm": 2.7206984933518865, "learning_rate": 3.3535159776536998e-06, "loss": 0.8256, "step": 4831 }, { "epoch": 0.74, "grad_norm": 2.5872363477593403, "learning_rate": 3.349812737962438e-06, "loss": 0.7648, "step": 4832 }, { "epoch": 0.74, "grad_norm": 2.772472674946281, "learning_rate": 3.34611113270069e-06, "loss": 0.7739, "step": 4833 }, { "epoch": 0.74, "grad_norm": 3.5276411112383186, "learning_rate": 3.3424111627782153e-06, "loss": 0.8324, "step": 4834 }, { "epoch": 0.74, "grad_norm": 2.738238132481366, "learning_rate": 3.3387128291043567e-06, "loss": 0.8597, "step": 4835 }, { "epoch": 0.74, "grad_norm": 2.6733033129669286, "learning_rate": 3.335016132588058e-06, "loss": 0.8107, "step": 4836 }, { "epoch": 0.74, "grad_norm": 2.3693822759565277, "learning_rate": 3.331321074137872e-06, "loss": 0.7716, "step": 4837 }, { "epoch": 0.74, "grad_norm": 3.4285933704356033, "learning_rate": 3.3276276546619324e-06, "loss": 0.7562, "step": 4838 }, { "epoch": 0.74, "grad_norm": 2.652023893212776, "learning_rate": 3.323935875067984e-06, "loss": 0.7635, "step": 4839 }, { "epoch": 0.74, "grad_norm": 2.4578290945488543, "learning_rate": 3.320245736263361e-06, "loss": 0.6576, "step": 4840 }, { "epoch": 0.74, "grad_norm": 2.7942404086760533, "learning_rate": 3.3165572391549915e-06, "loss": 0.838, "step": 4841 }, { "epoch": 0.74, "grad_norm": 2.8569696135870015, "learning_rate": 3.3128703846494115e-06, "loss": 0.725, "step": 4842 }, { "epoch": 0.74, "grad_norm": 2.7900629597578375, "learning_rate": 3.3091851736527434e-06, "loss": 0.7868, "step": 4843 }, { "epoch": 0.74, "grad_norm": 2.8046779017462407, "learning_rate": 3.3055016070707103e-06, "loss": 0.7461, "step": 4844 }, { "epoch": 0.74, "grad_norm": 2.8927421334982313, "learning_rate": 3.301819685808626e-06, "loss": 0.7093, "step": 4845 }, { "epoch": 0.74, "grad_norm": 2.858699181190039, "learning_rate": 3.2981394107714025e-06, "loss": 0.8473, "step": 4846 }, { "epoch": 0.74, "grad_norm": 2.3621659698589026, "learning_rate": 3.2944607828635554e-06, "loss": 0.7192, "step": 4847 }, { "epoch": 0.74, "grad_norm": 2.3742739290029724, "learning_rate": 3.2907838029891835e-06, "loss": 0.7438, "step": 4848 }, { "epoch": 0.74, "grad_norm": 2.606599052695424, "learning_rate": 3.287108472051982e-06, "loss": 0.709, "step": 4849 }, { "epoch": 0.74, "grad_norm": 2.6729760499150745, "learning_rate": 3.2834347909552524e-06, "loss": 0.794, "step": 4850 }, { "epoch": 0.74, "grad_norm": 2.90045193616181, "learning_rate": 3.2797627606018766e-06, "loss": 0.7958, "step": 4851 }, { "epoch": 0.74, "grad_norm": 2.6004243439972416, "learning_rate": 3.2760923818943356e-06, "loss": 0.703, "step": 4852 }, { "epoch": 0.74, "grad_norm": 2.754123503758659, "learning_rate": 3.272423655734711e-06, "loss": 0.7644, "step": 4853 }, { "epoch": 0.74, "grad_norm": 2.442792399877945, "learning_rate": 3.2687565830246704e-06, "loss": 0.8134, "step": 4854 }, { "epoch": 0.74, "grad_norm": 2.7555431485595028, "learning_rate": 3.265091164665477e-06, "loss": 0.7632, "step": 4855 }, { "epoch": 0.74, "grad_norm": 2.4544350325423854, "learning_rate": 3.261427401557987e-06, "loss": 0.7764, "step": 4856 }, { "epoch": 0.74, "grad_norm": 2.9536442126066604, "learning_rate": 3.2577652946026483e-06, "loss": 0.8453, "step": 4857 }, { "epoch": 0.74, "grad_norm": 2.9074987910434906, "learning_rate": 3.2541048446995104e-06, "loss": 0.7189, "step": 4858 }, { "epoch": 0.74, "grad_norm": 3.387376823817222, "learning_rate": 3.2504460527482062e-06, "loss": 0.8473, "step": 4859 }, { "epoch": 0.74, "grad_norm": 2.630468030984959, "learning_rate": 3.246788919647961e-06, "loss": 0.7018, "step": 4860 }, { "epoch": 0.74, "grad_norm": 2.673732710500758, "learning_rate": 3.2431334462976007e-06, "loss": 0.7881, "step": 4861 }, { "epoch": 0.74, "grad_norm": 2.4794806478398836, "learning_rate": 3.239479633595536e-06, "loss": 0.6952, "step": 4862 }, { "epoch": 0.74, "grad_norm": 2.7444790121034806, "learning_rate": 3.2358274824397685e-06, "loss": 0.7908, "step": 4863 }, { "epoch": 0.74, "grad_norm": 2.7718138842390156, "learning_rate": 3.232176993727901e-06, "loss": 0.7955, "step": 4864 }, { "epoch": 0.74, "grad_norm": 2.496774754790833, "learning_rate": 3.2285281683571178e-06, "loss": 0.7177, "step": 4865 }, { "epoch": 0.74, "grad_norm": 2.5301033397244064, "learning_rate": 3.2248810072241974e-06, "loss": 0.8109, "step": 4866 }, { "epoch": 0.74, "grad_norm": 2.926696010840226, "learning_rate": 3.22123551122551e-06, "loss": 0.7954, "step": 4867 }, { "epoch": 0.75, "grad_norm": 2.8720728648076888, "learning_rate": 3.2175916812570128e-06, "loss": 0.8188, "step": 4868 }, { "epoch": 0.75, "grad_norm": 2.7452597000369656, "learning_rate": 3.2139495182142656e-06, "loss": 0.7985, "step": 4869 }, { "epoch": 0.75, "grad_norm": 2.622907404479844, "learning_rate": 3.2103090229924028e-06, "loss": 0.8043, "step": 4870 }, { "epoch": 0.75, "grad_norm": 2.4188680852947604, "learning_rate": 3.206670196486156e-06, "loss": 0.6735, "step": 4871 }, { "epoch": 0.75, "grad_norm": 2.4322841023452044, "learning_rate": 3.203033039589851e-06, "loss": 0.6663, "step": 4872 }, { "epoch": 0.75, "grad_norm": 2.508207235025389, "learning_rate": 3.1993975531973986e-06, "loss": 0.7658, "step": 4873 }, { "epoch": 0.75, "grad_norm": 2.7555177905513784, "learning_rate": 3.1957637382022934e-06, "loss": 0.7667, "step": 4874 }, { "epoch": 0.75, "grad_norm": 2.5579291429948676, "learning_rate": 3.1921315954976317e-06, "loss": 0.695, "step": 4875 }, { "epoch": 0.75, "grad_norm": 2.5855212329014883, "learning_rate": 3.1885011259760913e-06, "loss": 0.8272, "step": 4876 }, { "epoch": 0.75, "grad_norm": 2.6520622459619694, "learning_rate": 3.1848723305299377e-06, "loss": 0.7435, "step": 4877 }, { "epoch": 0.75, "grad_norm": 2.6051923472077285, "learning_rate": 3.1812452100510283e-06, "loss": 0.769, "step": 4878 }, { "epoch": 0.75, "grad_norm": 2.7947194109042437, "learning_rate": 3.177619765430804e-06, "loss": 0.8326, "step": 4879 }, { "epoch": 0.75, "grad_norm": 2.802329035693689, "learning_rate": 3.1739959975603028e-06, "loss": 0.832, "step": 4880 }, { "epoch": 0.75, "grad_norm": 2.7431713512017355, "learning_rate": 3.1703739073301443e-06, "loss": 0.8379, "step": 4881 }, { "epoch": 0.75, "grad_norm": 2.653079167074889, "learning_rate": 3.166753495630531e-06, "loss": 0.8242, "step": 4882 }, { "epoch": 0.75, "grad_norm": 2.801604761107011, "learning_rate": 3.1631347633512665e-06, "loss": 0.7701, "step": 4883 }, { "epoch": 0.75, "grad_norm": 2.4733006429141358, "learning_rate": 3.1595177113817298e-06, "loss": 0.7112, "step": 4884 }, { "epoch": 0.75, "grad_norm": 3.1772595038733957, "learning_rate": 3.1559023406108892e-06, "loss": 0.7305, "step": 4885 }, { "epoch": 0.75, "grad_norm": 3.225116065554904, "learning_rate": 3.1522886519273053e-06, "loss": 0.8426, "step": 4886 }, { "epoch": 0.75, "grad_norm": 3.0385149713556774, "learning_rate": 3.14867664621912e-06, "loss": 0.7618, "step": 4887 }, { "epoch": 0.75, "grad_norm": 2.662748289735663, "learning_rate": 3.145066324374062e-06, "loss": 0.7236, "step": 4888 }, { "epoch": 0.75, "grad_norm": 3.0617462390536136, "learning_rate": 3.141457687279448e-06, "loss": 0.6848, "step": 4889 }, { "epoch": 0.75, "grad_norm": 2.49603952714078, "learning_rate": 3.1378507358221765e-06, "loss": 0.7738, "step": 4890 }, { "epoch": 0.75, "grad_norm": 2.996649925386801, "learning_rate": 3.1342454708887404e-06, "loss": 0.8548, "step": 4891 }, { "epoch": 0.75, "grad_norm": 2.936472847178242, "learning_rate": 3.1306418933652105e-06, "loss": 0.7815, "step": 4892 }, { "epoch": 0.75, "grad_norm": 2.7740070586751817, "learning_rate": 3.127040004137242e-06, "loss": 0.8209, "step": 4893 }, { "epoch": 0.75, "grad_norm": 2.6643704083806368, "learning_rate": 3.1234398040900836e-06, "loss": 0.7166, "step": 4894 }, { "epoch": 0.75, "grad_norm": 2.4987987550750006, "learning_rate": 3.119841294108562e-06, "loss": 0.6859, "step": 4895 }, { "epoch": 0.75, "grad_norm": 2.551914873986552, "learning_rate": 3.116244475077086e-06, "loss": 0.7637, "step": 4896 }, { "epoch": 0.75, "grad_norm": 2.574786283194233, "learning_rate": 3.112649347879658e-06, "loss": 0.7759, "step": 4897 }, { "epoch": 0.75, "grad_norm": 2.5550201444355434, "learning_rate": 3.1090559133998576e-06, "loss": 0.8019, "step": 4898 }, { "epoch": 0.75, "grad_norm": 4.54132320674916, "learning_rate": 3.10546417252085e-06, "loss": 0.8308, "step": 4899 }, { "epoch": 0.75, "grad_norm": 2.981833030314064, "learning_rate": 3.1018741261253836e-06, "loss": 0.8591, "step": 4900 }, { "epoch": 0.75, "grad_norm": 2.7911752802943677, "learning_rate": 3.0982857750957895e-06, "loss": 0.8839, "step": 4901 }, { "epoch": 0.75, "grad_norm": 2.7415236410740995, "learning_rate": 3.0946991203139896e-06, "loss": 0.7958, "step": 4902 }, { "epoch": 0.75, "grad_norm": 2.4625353307618836, "learning_rate": 3.0911141626614792e-06, "loss": 0.6676, "step": 4903 }, { "epoch": 0.75, "grad_norm": 2.5561719544460404, "learning_rate": 3.087530903019337e-06, "loss": 0.8083, "step": 4904 }, { "epoch": 0.75, "grad_norm": 2.8144420696732704, "learning_rate": 3.0839493422682344e-06, "loss": 0.7585, "step": 4905 }, { "epoch": 0.75, "grad_norm": 2.9425670291569945, "learning_rate": 3.0803694812884167e-06, "loss": 0.7633, "step": 4906 }, { "epoch": 0.75, "grad_norm": 3.743831645079609, "learning_rate": 3.0767913209597076e-06, "loss": 0.8813, "step": 4907 }, { "epoch": 0.75, "grad_norm": 2.7116331823136397, "learning_rate": 3.0732148621615266e-06, "loss": 0.7728, "step": 4908 }, { "epoch": 0.75, "grad_norm": 2.474561448824525, "learning_rate": 3.069640105772864e-06, "loss": 0.7739, "step": 4909 }, { "epoch": 0.75, "grad_norm": 2.4641070171397588, "learning_rate": 3.066067052672295e-06, "loss": 0.8748, "step": 4910 }, { "epoch": 0.75, "grad_norm": 2.5126521048603783, "learning_rate": 3.0624957037379733e-06, "loss": 0.8056, "step": 4911 }, { "epoch": 0.75, "grad_norm": 2.646426069683561, "learning_rate": 3.0589260598476354e-06, "loss": 0.7136, "step": 4912 }, { "epoch": 0.75, "grad_norm": 2.5569589171109284, "learning_rate": 3.0553581218786053e-06, "loss": 0.8465, "step": 4913 }, { "epoch": 0.75, "grad_norm": 2.383925636701246, "learning_rate": 3.0517918907077805e-06, "loss": 0.6981, "step": 4914 }, { "epoch": 0.75, "grad_norm": 2.6317329134965637, "learning_rate": 3.0482273672116347e-06, "loss": 0.8363, "step": 4915 }, { "epoch": 0.75, "grad_norm": 2.599309419546993, "learning_rate": 3.0446645522662356e-06, "loss": 0.8145, "step": 4916 }, { "epoch": 0.75, "grad_norm": 2.298720627729177, "learning_rate": 3.041103446747219e-06, "loss": 0.7698, "step": 4917 }, { "epoch": 0.75, "grad_norm": 2.792177265740668, "learning_rate": 3.037544051529804e-06, "loss": 0.8006, "step": 4918 }, { "epoch": 0.75, "grad_norm": 2.869378994570629, "learning_rate": 3.0339863674887938e-06, "loss": 0.6888, "step": 4919 }, { "epoch": 0.75, "grad_norm": 2.9143939203489646, "learning_rate": 3.0304303954985658e-06, "loss": 0.8653, "step": 4920 }, { "epoch": 0.75, "grad_norm": 2.68410731957122, "learning_rate": 3.026876136433078e-06, "loss": 0.8332, "step": 4921 }, { "epoch": 0.75, "grad_norm": 2.544048861240414, "learning_rate": 3.0233235911658665e-06, "loss": 0.7344, "step": 4922 }, { "epoch": 0.75, "grad_norm": 3.441477009221491, "learning_rate": 3.0197727605700457e-06, "loss": 0.8102, "step": 4923 }, { "epoch": 0.75, "grad_norm": 2.6620877456991425, "learning_rate": 3.016223645518315e-06, "loss": 0.7534, "step": 4924 }, { "epoch": 0.75, "grad_norm": 2.8841099980051874, "learning_rate": 3.012676246882945e-06, "loss": 0.8603, "step": 4925 }, { "epoch": 0.75, "grad_norm": 2.6406222549052223, "learning_rate": 3.009130565535784e-06, "loss": 0.7859, "step": 4926 }, { "epoch": 0.75, "grad_norm": 2.779876928459848, "learning_rate": 3.005586602348266e-06, "loss": 0.8433, "step": 4927 }, { "epoch": 0.75, "grad_norm": 3.1990330226074764, "learning_rate": 3.002044358191396e-06, "loss": 0.822, "step": 4928 }, { "epoch": 0.75, "grad_norm": 2.653865192531658, "learning_rate": 2.998503833935754e-06, "loss": 0.8725, "step": 4929 }, { "epoch": 0.75, "grad_norm": 2.7786270658771475, "learning_rate": 2.9949650304515098e-06, "loss": 0.8057, "step": 4930 }, { "epoch": 0.75, "grad_norm": 2.693592035006558, "learning_rate": 2.9914279486083963e-06, "loss": 0.8249, "step": 4931 }, { "epoch": 0.75, "grad_norm": 2.6704855809519694, "learning_rate": 2.9878925892757316e-06, "loss": 0.868, "step": 4932 }, { "epoch": 0.76, "grad_norm": 3.268918586765798, "learning_rate": 2.9843589533224047e-06, "loss": 0.8498, "step": 4933 }, { "epoch": 0.76, "grad_norm": 2.651243223030544, "learning_rate": 2.980827041616884e-06, "loss": 0.796, "step": 4934 }, { "epoch": 0.76, "grad_norm": 2.469076464190495, "learning_rate": 2.97729685502722e-06, "loss": 0.6783, "step": 4935 }, { "epoch": 0.76, "grad_norm": 2.6319493654941577, "learning_rate": 2.9737683944210293e-06, "loss": 0.8822, "step": 4936 }, { "epoch": 0.76, "grad_norm": 2.586829052141713, "learning_rate": 2.970241660665506e-06, "loss": 0.7955, "step": 4937 }, { "epoch": 0.76, "grad_norm": 2.6870831720388932, "learning_rate": 2.9667166546274286e-06, "loss": 0.9148, "step": 4938 }, { "epoch": 0.76, "grad_norm": 2.754533272622671, "learning_rate": 2.9631933771731414e-06, "loss": 0.8005, "step": 4939 }, { "epoch": 0.76, "grad_norm": 2.5926012760865684, "learning_rate": 2.9596718291685655e-06, "loss": 0.9103, "step": 4940 }, { "epoch": 0.76, "grad_norm": 2.62728719562524, "learning_rate": 2.9561520114792032e-06, "loss": 0.7476, "step": 4941 }, { "epoch": 0.76, "grad_norm": 2.62973819085915, "learning_rate": 2.952633924970121e-06, "loss": 0.8022, "step": 4942 }, { "epoch": 0.76, "grad_norm": 2.8037242977558896, "learning_rate": 2.949117570505975e-06, "loss": 0.8509, "step": 4943 }, { "epoch": 0.76, "grad_norm": 2.660119092568261, "learning_rate": 2.9456029489509773e-06, "loss": 0.8049, "step": 4944 }, { "epoch": 0.76, "grad_norm": 2.7454690787999785, "learning_rate": 2.942090061168925e-06, "loss": 0.8604, "step": 4945 }, { "epoch": 0.76, "grad_norm": 2.7323098990553603, "learning_rate": 2.938578908023192e-06, "loss": 0.8264, "step": 4946 }, { "epoch": 0.76, "grad_norm": 2.7730572623711573, "learning_rate": 2.9350694903767185e-06, "loss": 0.7949, "step": 4947 }, { "epoch": 0.76, "grad_norm": 2.961591688622995, "learning_rate": 2.9315618090920173e-06, "loss": 0.6742, "step": 4948 }, { "epoch": 0.76, "grad_norm": 2.650539798329177, "learning_rate": 2.9280558650311842e-06, "loss": 0.7325, "step": 4949 }, { "epoch": 0.76, "grad_norm": 2.712772938021309, "learning_rate": 2.9245516590558796e-06, "loss": 0.8108, "step": 4950 }, { "epoch": 0.76, "grad_norm": 2.613491635699674, "learning_rate": 2.9210491920273365e-06, "loss": 0.7417, "step": 4951 }, { "epoch": 0.76, "grad_norm": 2.572729005128316, "learning_rate": 2.917548464806368e-06, "loss": 0.8002, "step": 4952 }, { "epoch": 0.76, "grad_norm": 2.4466386915599383, "learning_rate": 2.914049478253349e-06, "loss": 0.766, "step": 4953 }, { "epoch": 0.76, "grad_norm": 2.615461090522433, "learning_rate": 2.910552233228239e-06, "loss": 0.8398, "step": 4954 }, { "epoch": 0.76, "grad_norm": 2.7812802922894964, "learning_rate": 2.907056730590558e-06, "loss": 0.8144, "step": 4955 }, { "epoch": 0.76, "grad_norm": 2.6422650497261895, "learning_rate": 2.903562971199405e-06, "loss": 0.7422, "step": 4956 }, { "epoch": 0.76, "grad_norm": 2.6107459832204496, "learning_rate": 2.9000709559134456e-06, "loss": 0.7618, "step": 4957 }, { "epoch": 0.76, "grad_norm": 2.537852760775243, "learning_rate": 2.8965806855909207e-06, "loss": 0.8506, "step": 4958 }, { "epoch": 0.76, "grad_norm": 3.247518579755631, "learning_rate": 2.8930921610896366e-06, "loss": 0.8605, "step": 4959 }, { "epoch": 0.76, "grad_norm": 2.649138417092869, "learning_rate": 2.8896053832669822e-06, "loss": 0.7862, "step": 4960 }, { "epoch": 0.76, "grad_norm": 2.812294876476703, "learning_rate": 2.8861203529799052e-06, "loss": 0.8908, "step": 4961 }, { "epoch": 0.76, "grad_norm": 2.4486042948844307, "learning_rate": 2.8826370710849274e-06, "loss": 0.7694, "step": 4962 }, { "epoch": 0.76, "grad_norm": 2.767682856019066, "learning_rate": 2.8791555384381466e-06, "loss": 0.8236, "step": 4963 }, { "epoch": 0.76, "grad_norm": 2.853722056482501, "learning_rate": 2.8756757558952186e-06, "loss": 0.7457, "step": 4964 }, { "epoch": 0.76, "grad_norm": 2.6353513595010014, "learning_rate": 2.8721977243113854e-06, "loss": 0.7987, "step": 4965 }, { "epoch": 0.76, "grad_norm": 2.9905809231839355, "learning_rate": 2.8687214445414434e-06, "loss": 0.7522, "step": 4966 }, { "epoch": 0.76, "grad_norm": 2.418550038242517, "learning_rate": 2.8652469174397667e-06, "loss": 0.7241, "step": 4967 }, { "epoch": 0.76, "grad_norm": 2.4665763541728247, "learning_rate": 2.8617741438602964e-06, "loss": 0.736, "step": 4968 }, { "epoch": 0.76, "grad_norm": 2.5783211822392196, "learning_rate": 2.8583031246565417e-06, "loss": 0.8074, "step": 4969 }, { "epoch": 0.76, "grad_norm": 2.7056606174723274, "learning_rate": 2.8548338606815805e-06, "loss": 0.7845, "step": 4970 }, { "epoch": 0.76, "grad_norm": 2.6595235853664403, "learning_rate": 2.8513663527880653e-06, "loss": 0.771, "step": 4971 }, { "epoch": 0.76, "grad_norm": 2.66613898311809, "learning_rate": 2.8479006018282096e-06, "loss": 0.6243, "step": 4972 }, { "epoch": 0.76, "grad_norm": 2.692923874491351, "learning_rate": 2.8444366086537943e-06, "loss": 0.8159, "step": 4973 }, { "epoch": 0.76, "grad_norm": 2.5376497677560717, "learning_rate": 2.840974374116179e-06, "loss": 0.711, "step": 4974 }, { "epoch": 0.76, "grad_norm": 2.863142652970658, "learning_rate": 2.8375138990662766e-06, "loss": 0.8243, "step": 4975 }, { "epoch": 0.76, "grad_norm": 2.488143347726353, "learning_rate": 2.8340551843545817e-06, "loss": 0.7433, "step": 4976 }, { "epoch": 0.76, "grad_norm": 2.8879241474099566, "learning_rate": 2.8305982308311453e-06, "loss": 0.8774, "step": 4977 }, { "epoch": 0.76, "grad_norm": 2.5762550206887824, "learning_rate": 2.827143039345591e-06, "loss": 0.7337, "step": 4978 }, { "epoch": 0.76, "grad_norm": 2.595931442952882, "learning_rate": 2.823689610747108e-06, "loss": 0.8634, "step": 4979 }, { "epoch": 0.76, "grad_norm": 2.3186033910243693, "learning_rate": 2.820237945884451e-06, "loss": 0.7204, "step": 4980 }, { "epoch": 0.76, "grad_norm": 2.614217693941196, "learning_rate": 2.8167880456059394e-06, "loss": 0.7471, "step": 4981 }, { "epoch": 0.76, "grad_norm": 2.4858416594026043, "learning_rate": 2.8133399107594683e-06, "loss": 0.7809, "step": 4982 }, { "epoch": 0.76, "grad_norm": 2.937018679007705, "learning_rate": 2.80989354219249e-06, "loss": 0.7293, "step": 4983 }, { "epoch": 0.76, "grad_norm": 2.670884778953924, "learning_rate": 2.8064489407520225e-06, "loss": 0.764, "step": 4984 }, { "epoch": 0.76, "grad_norm": 2.5856640266782973, "learning_rate": 2.803006107284657e-06, "loss": 0.8138, "step": 4985 }, { "epoch": 0.76, "grad_norm": 3.225279699315726, "learning_rate": 2.7995650426365405e-06, "loss": 0.7173, "step": 4986 }, { "epoch": 0.76, "grad_norm": 2.611351350122502, "learning_rate": 2.7961257476533954e-06, "loss": 0.7806, "step": 4987 }, { "epoch": 0.76, "grad_norm": 2.671165795967523, "learning_rate": 2.792688223180502e-06, "loss": 0.8442, "step": 4988 }, { "epoch": 0.76, "grad_norm": 3.221238185390751, "learning_rate": 2.7892524700627053e-06, "loss": 0.8327, "step": 4989 }, { "epoch": 0.76, "grad_norm": 2.7434210137367905, "learning_rate": 2.7858184891444197e-06, "loss": 0.7641, "step": 4990 }, { "epoch": 0.76, "grad_norm": 2.642891673485018, "learning_rate": 2.7823862812696203e-06, "loss": 0.7117, "step": 4991 }, { "epoch": 0.76, "grad_norm": 2.713233880054568, "learning_rate": 2.7789558472818435e-06, "loss": 0.7222, "step": 4992 }, { "epoch": 0.76, "grad_norm": 2.7191943051310097, "learning_rate": 2.775527188024201e-06, "loss": 0.8237, "step": 4993 }, { "epoch": 0.76, "grad_norm": 2.9476597995929237, "learning_rate": 2.772100304339355e-06, "loss": 0.8084, "step": 4994 }, { "epoch": 0.76, "grad_norm": 2.9476214832791774, "learning_rate": 2.7686751970695427e-06, "loss": 0.7891, "step": 4995 }, { "epoch": 0.76, "grad_norm": 3.0800081298559814, "learning_rate": 2.7652518670565577e-06, "loss": 0.8109, "step": 4996 }, { "epoch": 0.76, "grad_norm": 2.455886649514224, "learning_rate": 2.7618303151417534e-06, "loss": 0.6759, "step": 4997 }, { "epoch": 0.77, "grad_norm": 2.53600811818245, "learning_rate": 2.758410542166059e-06, "loss": 0.8115, "step": 4998 }, { "epoch": 0.77, "grad_norm": 2.819024745363265, "learning_rate": 2.754992548969956e-06, "loss": 0.8393, "step": 4999 }, { "epoch": 0.77, "grad_norm": 2.6405712089148254, "learning_rate": 2.75157633639349e-06, "loss": 0.7871, "step": 5000 }, { "epoch": 0.77, "grad_norm": 2.689984644938119, "learning_rate": 2.748161905276271e-06, "loss": 0.7925, "step": 5001 }, { "epoch": 0.77, "grad_norm": 2.5912458575941604, "learning_rate": 2.7447492564574708e-06, "loss": 0.8126, "step": 5002 }, { "epoch": 0.77, "grad_norm": 2.922850248609202, "learning_rate": 2.7413383907758183e-06, "loss": 0.8764, "step": 5003 }, { "epoch": 0.77, "grad_norm": 2.9523305387496404, "learning_rate": 2.737929309069616e-06, "loss": 0.7544, "step": 5004 }, { "epoch": 0.77, "grad_norm": 2.7388750693266837, "learning_rate": 2.7345220121767136e-06, "loss": 0.7313, "step": 5005 }, { "epoch": 0.77, "grad_norm": 2.5705254543989833, "learning_rate": 2.7311165009345362e-06, "loss": 0.6219, "step": 5006 }, { "epoch": 0.77, "grad_norm": 2.6715192568830886, "learning_rate": 2.7277127761800592e-06, "loss": 0.8486, "step": 5007 }, { "epoch": 0.77, "grad_norm": 2.596380510041663, "learning_rate": 2.7243108387498207e-06, "loss": 0.7227, "step": 5008 }, { "epoch": 0.77, "grad_norm": 2.586844529728933, "learning_rate": 2.7209106894799253e-06, "loss": 0.8468, "step": 5009 }, { "epoch": 0.77, "grad_norm": 11.099774070134371, "learning_rate": 2.7175123292060335e-06, "loss": 0.8458, "step": 5010 }, { "epoch": 0.77, "grad_norm": 2.568013495598628, "learning_rate": 2.714115758763366e-06, "loss": 0.7795, "step": 5011 }, { "epoch": 0.77, "grad_norm": 2.5389843195512642, "learning_rate": 2.710720978986705e-06, "loss": 0.8222, "step": 5012 }, { "epoch": 0.77, "grad_norm": 2.4481461619079146, "learning_rate": 2.7073279907103913e-06, "loss": 0.6732, "step": 5013 }, { "epoch": 0.77, "grad_norm": 2.572469747307365, "learning_rate": 2.703936794768325e-06, "loss": 0.7647, "step": 5014 }, { "epoch": 0.77, "grad_norm": 2.510351138783795, "learning_rate": 2.7005473919939706e-06, "loss": 0.7419, "step": 5015 }, { "epoch": 0.77, "grad_norm": 2.535945112086226, "learning_rate": 2.6971597832203434e-06, "loss": 0.7646, "step": 5016 }, { "epoch": 0.77, "grad_norm": 2.7867749494925027, "learning_rate": 2.693773969280029e-06, "loss": 0.6355, "step": 5017 }, { "epoch": 0.77, "grad_norm": 2.788432665774386, "learning_rate": 2.6903899510051624e-06, "loss": 0.913, "step": 5018 }, { "epoch": 0.77, "grad_norm": 2.4875282017699964, "learning_rate": 2.6870077292274366e-06, "loss": 0.6947, "step": 5019 }, { "epoch": 0.77, "grad_norm": 2.75155649877325, "learning_rate": 2.6836273047781137e-06, "loss": 0.7729, "step": 5020 }, { "epoch": 0.77, "grad_norm": 2.7143198356277995, "learning_rate": 2.6802486784880044e-06, "loss": 0.8094, "step": 5021 }, { "epoch": 0.77, "grad_norm": 2.6107155069240435, "learning_rate": 2.676871851187479e-06, "loss": 0.8053, "step": 5022 }, { "epoch": 0.77, "grad_norm": 2.9969143394969984, "learning_rate": 2.6734968237064686e-06, "loss": 0.806, "step": 5023 }, { "epoch": 0.77, "grad_norm": 2.752371634653211, "learning_rate": 2.6701235968744587e-06, "loss": 0.6662, "step": 5024 }, { "epoch": 0.77, "grad_norm": 2.552224215391848, "learning_rate": 2.6667521715204914e-06, "loss": 0.7168, "step": 5025 }, { "epoch": 0.77, "grad_norm": 2.620940328135656, "learning_rate": 2.6633825484731746e-06, "loss": 0.8322, "step": 5026 }, { "epoch": 0.77, "grad_norm": 5.580152864684537, "learning_rate": 2.6600147285606625e-06, "loss": 0.8152, "step": 5027 }, { "epoch": 0.77, "grad_norm": 2.3639360061856247, "learning_rate": 2.6566487126106745e-06, "loss": 0.6962, "step": 5028 }, { "epoch": 0.77, "grad_norm": 2.406455472298094, "learning_rate": 2.6532845014504814e-06, "loss": 0.7394, "step": 5029 }, { "epoch": 0.77, "grad_norm": 2.6169693952537822, "learning_rate": 2.6499220959069085e-06, "loss": 0.7742, "step": 5030 }, { "epoch": 0.77, "grad_norm": 2.4927676361698934, "learning_rate": 2.6465614968063456e-06, "loss": 0.781, "step": 5031 }, { "epoch": 0.77, "grad_norm": 2.62965426461676, "learning_rate": 2.6432027049747333e-06, "loss": 0.8843, "step": 5032 }, { "epoch": 0.77, "grad_norm": 2.7037687891142763, "learning_rate": 2.639845721237566e-06, "loss": 0.8107, "step": 5033 }, { "epoch": 0.77, "grad_norm": 2.643649919678502, "learning_rate": 2.6364905464198987e-06, "loss": 0.7893, "step": 5034 }, { "epoch": 0.77, "grad_norm": 2.691892689329616, "learning_rate": 2.6331371813463356e-06, "loss": 0.8265, "step": 5035 }, { "epoch": 0.77, "grad_norm": 2.6975452736113454, "learning_rate": 2.6297856268410406e-06, "loss": 0.81, "step": 5036 }, { "epoch": 0.77, "grad_norm": 2.4158822610707325, "learning_rate": 2.626435883727735e-06, "loss": 0.6869, "step": 5037 }, { "epoch": 0.77, "grad_norm": 2.4939839844165905, "learning_rate": 2.623087952829688e-06, "loss": 0.8316, "step": 5038 }, { "epoch": 0.77, "grad_norm": 2.776427207607364, "learning_rate": 2.619741834969731e-06, "loss": 0.7877, "step": 5039 }, { "epoch": 0.77, "grad_norm": 2.3779010383973707, "learning_rate": 2.616397530970244e-06, "loss": 0.6899, "step": 5040 }, { "epoch": 0.77, "grad_norm": 2.6083802441754464, "learning_rate": 2.6130550416531597e-06, "loss": 0.7778, "step": 5041 }, { "epoch": 0.77, "grad_norm": 2.56450784477195, "learning_rate": 2.609714367839975e-06, "loss": 0.7963, "step": 5042 }, { "epoch": 0.77, "grad_norm": 2.5459301905849654, "learning_rate": 2.606375510351731e-06, "loss": 0.7842, "step": 5043 }, { "epoch": 0.77, "grad_norm": 2.5217148421897546, "learning_rate": 2.6030384700090238e-06, "loss": 0.7401, "step": 5044 }, { "epoch": 0.77, "grad_norm": 2.45823283836036, "learning_rate": 2.599703247632005e-06, "loss": 0.7311, "step": 5045 }, { "epoch": 0.77, "grad_norm": 2.9579669428110598, "learning_rate": 2.596369844040378e-06, "loss": 0.7747, "step": 5046 }, { "epoch": 0.77, "grad_norm": 2.5750011906269923, "learning_rate": 2.5930382600533998e-06, "loss": 0.7482, "step": 5047 }, { "epoch": 0.77, "grad_norm": 2.7888629756635206, "learning_rate": 2.5897084964898835e-06, "loss": 0.7846, "step": 5048 }, { "epoch": 0.77, "grad_norm": 2.5721294503502157, "learning_rate": 2.586380554168185e-06, "loss": 0.6925, "step": 5049 }, { "epoch": 0.77, "grad_norm": 2.7618620787962493, "learning_rate": 2.583054433906228e-06, "loss": 0.7054, "step": 5050 }, { "epoch": 0.77, "grad_norm": 2.6276942856050938, "learning_rate": 2.5797301365214742e-06, "loss": 0.7645, "step": 5051 }, { "epoch": 0.77, "grad_norm": 2.616392198716273, "learning_rate": 2.576407662830942e-06, "loss": 0.7362, "step": 5052 }, { "epoch": 0.77, "grad_norm": 2.711256026542573, "learning_rate": 2.5730870136512055e-06, "loss": 0.7994, "step": 5053 }, { "epoch": 0.77, "grad_norm": 2.6991497732690743, "learning_rate": 2.5697681897983862e-06, "loss": 0.8072, "step": 5054 }, { "epoch": 0.77, "grad_norm": 2.4775791838568586, "learning_rate": 2.566451192088156e-06, "loss": 0.7789, "step": 5055 }, { "epoch": 0.77, "grad_norm": 8.388324935782896, "learning_rate": 2.5631360213357425e-06, "loss": 0.8261, "step": 5056 }, { "epoch": 0.77, "grad_norm": 2.514421673855606, "learning_rate": 2.5598226783559198e-06, "loss": 0.7355, "step": 5057 }, { "epoch": 0.77, "grad_norm": 2.536006040702953, "learning_rate": 2.5565111639630125e-06, "loss": 0.6696, "step": 5058 }, { "epoch": 0.77, "grad_norm": 2.7285987787744275, "learning_rate": 2.5532014789709027e-06, "loss": 0.8219, "step": 5059 }, { "epoch": 0.77, "grad_norm": 2.476472443285063, "learning_rate": 2.549893624193014e-06, "loss": 0.7924, "step": 5060 }, { "epoch": 0.77, "grad_norm": 2.662355526288266, "learning_rate": 2.5465876004423285e-06, "loss": 0.7893, "step": 5061 }, { "epoch": 0.77, "grad_norm": 2.507656262345981, "learning_rate": 2.543283408531373e-06, "loss": 0.7732, "step": 5062 }, { "epoch": 0.77, "grad_norm": 2.8257941998664964, "learning_rate": 2.53998104927222e-06, "loss": 0.7729, "step": 5063 }, { "epoch": 0.78, "grad_norm": 2.7337037395372334, "learning_rate": 2.5366805234765047e-06, "loss": 0.7656, "step": 5064 }, { "epoch": 0.78, "grad_norm": 2.5500710978463883, "learning_rate": 2.5333818319554002e-06, "loss": 0.6465, "step": 5065 }, { "epoch": 0.78, "grad_norm": 2.7708922146707167, "learning_rate": 2.530084975519629e-06, "loss": 0.8558, "step": 5066 }, { "epoch": 0.78, "grad_norm": 2.7554935025214697, "learning_rate": 2.5267899549794728e-06, "loss": 0.754, "step": 5067 }, { "epoch": 0.78, "grad_norm": 2.4565508163642993, "learning_rate": 2.523496771144751e-06, "loss": 0.8833, "step": 5068 }, { "epoch": 0.78, "grad_norm": 2.5094279415765417, "learning_rate": 2.5202054248248362e-06, "loss": 0.7924, "step": 5069 }, { "epoch": 0.78, "grad_norm": 3.172596205277227, "learning_rate": 2.5169159168286503e-06, "loss": 0.7781, "step": 5070 }, { "epoch": 0.78, "grad_norm": 2.780052244901856, "learning_rate": 2.513628247964658e-06, "loss": 0.8596, "step": 5071 }, { "epoch": 0.78, "grad_norm": 2.6389858730839517, "learning_rate": 2.510342419040881e-06, "loss": 0.7318, "step": 5072 }, { "epoch": 0.78, "grad_norm": 3.0811186388492193, "learning_rate": 2.5070584308648828e-06, "loss": 0.8587, "step": 5073 }, { "epoch": 0.78, "grad_norm": 2.53080728953482, "learning_rate": 2.503776284243772e-06, "loss": 0.7536, "step": 5074 }, { "epoch": 0.78, "grad_norm": 2.7781051409831026, "learning_rate": 2.5004959799842133e-06, "loss": 0.8435, "step": 5075 }, { "epoch": 0.78, "grad_norm": 2.7029578113303256, "learning_rate": 2.497217518892412e-06, "loss": 0.7928, "step": 5076 }, { "epoch": 0.78, "grad_norm": 2.7198966214625666, "learning_rate": 2.493940901774118e-06, "loss": 0.7304, "step": 5077 }, { "epoch": 0.78, "grad_norm": 3.482025583931032, "learning_rate": 2.490666129434638e-06, "loss": 0.828, "step": 5078 }, { "epoch": 0.78, "grad_norm": 2.46772119696312, "learning_rate": 2.4873932026788162e-06, "loss": 0.7316, "step": 5079 }, { "epoch": 0.78, "grad_norm": 2.5961425268325375, "learning_rate": 2.4841221223110467e-06, "loss": 0.8223, "step": 5080 }, { "epoch": 0.78, "grad_norm": 2.5679783685419544, "learning_rate": 2.4808528891352677e-06, "loss": 0.7975, "step": 5081 }, { "epoch": 0.78, "grad_norm": 2.7506846436308914, "learning_rate": 2.4775855039549647e-06, "loss": 0.7494, "step": 5082 }, { "epoch": 0.78, "grad_norm": 2.5964912817122605, "learning_rate": 2.4743199675731722e-06, "loss": 0.7492, "step": 5083 }, { "epoch": 0.78, "grad_norm": 2.8934879578568884, "learning_rate": 2.4710562807924664e-06, "loss": 0.7708, "step": 5084 }, { "epoch": 0.78, "grad_norm": 2.6202697209531114, "learning_rate": 2.4677944444149683e-06, "loss": 0.7256, "step": 5085 }, { "epoch": 0.78, "grad_norm": 2.626432453067587, "learning_rate": 2.464534459242348e-06, "loss": 0.8259, "step": 5086 }, { "epoch": 0.78, "grad_norm": 2.521069808194475, "learning_rate": 2.4612763260758187e-06, "loss": 0.696, "step": 5087 }, { "epoch": 0.78, "grad_norm": 2.64835206059012, "learning_rate": 2.458020045716134e-06, "loss": 0.7756, "step": 5088 }, { "epoch": 0.78, "grad_norm": 2.3416846206287163, "learning_rate": 2.4547656189636014e-06, "loss": 0.7253, "step": 5089 }, { "epoch": 0.78, "grad_norm": 2.942055576016125, "learning_rate": 2.451513046618067e-06, "loss": 0.7859, "step": 5090 }, { "epoch": 0.78, "grad_norm": 2.8389802865849383, "learning_rate": 2.44826232947892e-06, "loss": 0.8852, "step": 5091 }, { "epoch": 0.78, "grad_norm": 2.511118745358438, "learning_rate": 2.4450134683450957e-06, "loss": 0.729, "step": 5092 }, { "epoch": 0.78, "grad_norm": 3.3110347546272845, "learning_rate": 2.4417664640150695e-06, "loss": 0.7976, "step": 5093 }, { "epoch": 0.78, "grad_norm": 2.440522966930613, "learning_rate": 2.4385213172868716e-06, "loss": 0.7789, "step": 5094 }, { "epoch": 0.78, "grad_norm": 2.6106319712037207, "learning_rate": 2.4352780289580647e-06, "loss": 0.8299, "step": 5095 }, { "epoch": 0.78, "grad_norm": 2.5840976790034924, "learning_rate": 2.4320365998257543e-06, "loss": 0.7839, "step": 5096 }, { "epoch": 0.78, "grad_norm": 2.68223277349005, "learning_rate": 2.4287970306865994e-06, "loss": 0.7534, "step": 5097 }, { "epoch": 0.78, "grad_norm": 2.4092525951805706, "learning_rate": 2.4255593223367923e-06, "loss": 0.7492, "step": 5098 }, { "epoch": 0.78, "grad_norm": 2.6820676394906435, "learning_rate": 2.4223234755720672e-06, "loss": 0.7327, "step": 5099 }, { "epoch": 0.78, "grad_norm": 2.567354858149662, "learning_rate": 2.4190894911877105e-06, "loss": 0.7733, "step": 5100 }, { "epoch": 0.78, "grad_norm": 2.647653908124238, "learning_rate": 2.4158573699785427e-06, "loss": 0.7113, "step": 5101 }, { "epoch": 0.78, "grad_norm": 2.6788491642529397, "learning_rate": 2.412627112738928e-06, "loss": 0.7993, "step": 5102 }, { "epoch": 0.78, "grad_norm": 2.9280387750202412, "learning_rate": 2.4093987202627735e-06, "loss": 0.8553, "step": 5103 }, { "epoch": 0.78, "grad_norm": 2.581053853352887, "learning_rate": 2.4061721933435246e-06, "loss": 0.8247, "step": 5104 }, { "epoch": 0.78, "grad_norm": 2.580506365930907, "learning_rate": 2.4029475327741758e-06, "loss": 0.8352, "step": 5105 }, { "epoch": 0.78, "grad_norm": 2.711539323105799, "learning_rate": 2.3997247393472557e-06, "loss": 0.7786, "step": 5106 }, { "epoch": 0.78, "grad_norm": 3.6544888378906997, "learning_rate": 2.3965038138548346e-06, "loss": 0.8774, "step": 5107 }, { "epoch": 0.78, "grad_norm": 2.781610147223123, "learning_rate": 2.3932847570885307e-06, "loss": 0.7689, "step": 5108 }, { "epoch": 0.78, "grad_norm": 2.838519577882593, "learning_rate": 2.390067569839496e-06, "loss": 0.7351, "step": 5109 }, { "epoch": 0.78, "grad_norm": 2.605314160377416, "learning_rate": 2.3868522528984207e-06, "loss": 0.7761, "step": 5110 }, { "epoch": 0.78, "grad_norm": 2.7188592167400154, "learning_rate": 2.383638807055545e-06, "loss": 0.8472, "step": 5111 }, { "epoch": 0.78, "grad_norm": 2.5482203378844366, "learning_rate": 2.380427233100643e-06, "loss": 0.7605, "step": 5112 }, { "epoch": 0.78, "grad_norm": 2.704449992334734, "learning_rate": 2.3772175318230272e-06, "loss": 0.8089, "step": 5113 }, { "epoch": 0.78, "grad_norm": 2.8402849358160167, "learning_rate": 2.374009704011554e-06, "loss": 0.6916, "step": 5114 }, { "epoch": 0.78, "grad_norm": 2.864028803777469, "learning_rate": 2.3708037504546124e-06, "loss": 0.799, "step": 5115 }, { "epoch": 0.78, "grad_norm": 2.501326567246752, "learning_rate": 2.367599671940144e-06, "loss": 0.6988, "step": 5116 }, { "epoch": 0.78, "grad_norm": 3.2004108035764625, "learning_rate": 2.364397469255617e-06, "loss": 0.8027, "step": 5117 }, { "epoch": 0.78, "grad_norm": 2.6490640456468046, "learning_rate": 2.3611971431880407e-06, "loss": 0.806, "step": 5118 }, { "epoch": 0.78, "grad_norm": 2.5612708686624686, "learning_rate": 2.357998694523972e-06, "loss": 0.6694, "step": 5119 }, { "epoch": 0.78, "grad_norm": 3.0401653824639516, "learning_rate": 2.3548021240494955e-06, "loss": 0.7947, "step": 5120 }, { "epoch": 0.78, "grad_norm": 2.8307884578484686, "learning_rate": 2.3516074325502368e-06, "loss": 0.8022, "step": 5121 }, { "epoch": 0.78, "grad_norm": 2.72977536780408, "learning_rate": 2.348414620811367e-06, "loss": 0.7399, "step": 5122 }, { "epoch": 0.78, "grad_norm": 2.4748006126897417, "learning_rate": 2.345223689617586e-06, "loss": 0.6904, "step": 5123 }, { "epoch": 0.78, "grad_norm": 2.4064619793053934, "learning_rate": 2.342034639753137e-06, "loss": 0.7289, "step": 5124 }, { "epoch": 0.78, "grad_norm": 2.9327774960950537, "learning_rate": 2.3388474720017963e-06, "loss": 0.8078, "step": 5125 }, { "epoch": 0.78, "grad_norm": 2.4273391870497125, "learning_rate": 2.33566218714688e-06, "loss": 0.8186, "step": 5126 }, { "epoch": 0.78, "grad_norm": 3.339466964529913, "learning_rate": 2.3324787859712462e-06, "loss": 0.8604, "step": 5127 }, { "epoch": 0.78, "grad_norm": 2.7521085637109732, "learning_rate": 2.3292972692572833e-06, "loss": 0.7874, "step": 5128 }, { "epoch": 0.79, "grad_norm": 2.7279640075919307, "learning_rate": 2.3261176377869165e-06, "loss": 0.7593, "step": 5129 }, { "epoch": 0.79, "grad_norm": 2.598691379497955, "learning_rate": 2.3229398923416136e-06, "loss": 0.7745, "step": 5130 }, { "epoch": 0.79, "grad_norm": 2.519874664868604, "learning_rate": 2.319764033702375e-06, "loss": 0.8348, "step": 5131 }, { "epoch": 0.79, "grad_norm": 2.601307397164149, "learning_rate": 2.316590062649734e-06, "loss": 0.7321, "step": 5132 }, { "epoch": 0.79, "grad_norm": 2.4969779020558853, "learning_rate": 2.313417979963768e-06, "loss": 0.728, "step": 5133 }, { "epoch": 0.79, "grad_norm": 2.650113640353963, "learning_rate": 2.310247786424086e-06, "loss": 0.7826, "step": 5134 }, { "epoch": 0.79, "grad_norm": 2.5214125575947963, "learning_rate": 2.3070794828098285e-06, "loss": 0.7338, "step": 5135 }, { "epoch": 0.79, "grad_norm": 2.9428606849207837, "learning_rate": 2.3039130698996802e-06, "loss": 0.8268, "step": 5136 }, { "epoch": 0.79, "grad_norm": 2.6185692051050182, "learning_rate": 2.3007485484718505e-06, "loss": 0.6381, "step": 5137 }, { "epoch": 0.79, "grad_norm": 2.6834657323433992, "learning_rate": 2.297585919304097e-06, "loss": 0.7596, "step": 5138 }, { "epoch": 0.79, "grad_norm": 2.781398509030959, "learning_rate": 2.294425183173703e-06, "loss": 0.7946, "step": 5139 }, { "epoch": 0.79, "grad_norm": 2.561953880258535, "learning_rate": 2.2912663408574843e-06, "loss": 0.787, "step": 5140 }, { "epoch": 0.79, "grad_norm": 2.618970065482725, "learning_rate": 2.288109393131802e-06, "loss": 0.7402, "step": 5141 }, { "epoch": 0.79, "grad_norm": 2.428394606083279, "learning_rate": 2.2849543407725403e-06, "loss": 0.6512, "step": 5142 }, { "epoch": 0.79, "grad_norm": 2.8219441478736917, "learning_rate": 2.2818011845551293e-06, "loss": 0.8186, "step": 5143 }, { "epoch": 0.79, "grad_norm": 2.5187641567693984, "learning_rate": 2.27864992525452e-06, "loss": 0.7891, "step": 5144 }, { "epoch": 0.79, "grad_norm": 2.59796954673457, "learning_rate": 2.2755005636452067e-06, "loss": 0.7758, "step": 5145 }, { "epoch": 0.79, "grad_norm": 2.652727950464101, "learning_rate": 2.2723531005012133e-06, "loss": 0.8253, "step": 5146 }, { "epoch": 0.79, "grad_norm": 2.7847114123727943, "learning_rate": 2.2692075365960974e-06, "loss": 0.8096, "step": 5147 }, { "epoch": 0.79, "grad_norm": 2.692857244476947, "learning_rate": 2.2660638727029484e-06, "loss": 0.8111, "step": 5148 }, { "epoch": 0.79, "grad_norm": 2.879953323799714, "learning_rate": 2.2629221095943952e-06, "loss": 0.7733, "step": 5149 }, { "epoch": 0.79, "grad_norm": 2.710345885860965, "learning_rate": 2.2597822480425934e-06, "loss": 0.7381, "step": 5150 }, { "epoch": 0.79, "grad_norm": 2.532688579438755, "learning_rate": 2.2566442888192306e-06, "loss": 0.8801, "step": 5151 }, { "epoch": 0.79, "grad_norm": 2.6739871284912713, "learning_rate": 2.2535082326955325e-06, "loss": 0.7638, "step": 5152 }, { "epoch": 0.79, "grad_norm": 2.6604332292354353, "learning_rate": 2.2503740804422504e-06, "loss": 0.7703, "step": 5153 }, { "epoch": 0.79, "grad_norm": 3.1129370913861996, "learning_rate": 2.247241832829675e-06, "loss": 0.81, "step": 5154 }, { "epoch": 0.79, "grad_norm": 2.777618927551345, "learning_rate": 2.244111490627623e-06, "loss": 0.8235, "step": 5155 }, { "epoch": 0.79, "grad_norm": 2.6995224648073917, "learning_rate": 2.2409830546054456e-06, "loss": 0.6747, "step": 5156 }, { "epoch": 0.79, "grad_norm": 2.670008968578099, "learning_rate": 2.2378565255320226e-06, "loss": 0.8097, "step": 5157 }, { "epoch": 0.79, "grad_norm": 2.7697821647266103, "learning_rate": 2.2347319041757675e-06, "loss": 0.8285, "step": 5158 }, { "epoch": 0.79, "grad_norm": 2.749625518855146, "learning_rate": 2.2316091913046235e-06, "loss": 0.713, "step": 5159 }, { "epoch": 0.79, "grad_norm": 2.594875868036443, "learning_rate": 2.2284883876860707e-06, "loss": 0.7735, "step": 5160 }, { "epoch": 0.79, "grad_norm": 2.527292720733303, "learning_rate": 2.2253694940871106e-06, "loss": 0.7583, "step": 5161 }, { "epoch": 0.79, "grad_norm": 3.417019669741224, "learning_rate": 2.22225251127428e-06, "loss": 0.7943, "step": 5162 }, { "epoch": 0.79, "grad_norm": 2.642528698366732, "learning_rate": 2.219137440013649e-06, "loss": 0.8459, "step": 5163 }, { "epoch": 0.79, "grad_norm": 2.5632337289614746, "learning_rate": 2.2160242810708098e-06, "loss": 0.8122, "step": 5164 }, { "epoch": 0.79, "grad_norm": 2.5843935751636216, "learning_rate": 2.212913035210895e-06, "loss": 0.761, "step": 5165 }, { "epoch": 0.79, "grad_norm": 2.6664617878227626, "learning_rate": 2.2098037031985598e-06, "loss": 0.6748, "step": 5166 }, { "epoch": 0.79, "grad_norm": 2.5021842064630033, "learning_rate": 2.20669628579799e-06, "loss": 0.7371, "step": 5167 }, { "epoch": 0.79, "grad_norm": 2.6612014852259103, "learning_rate": 2.203590783772902e-06, "loss": 0.7361, "step": 5168 }, { "epoch": 0.79, "grad_norm": 2.562022582459715, "learning_rate": 2.2004871978865407e-06, "loss": 0.7548, "step": 5169 }, { "epoch": 0.79, "grad_norm": 2.8162698580900045, "learning_rate": 2.1973855289016767e-06, "loss": 0.8065, "step": 5170 }, { "epoch": 0.79, "grad_norm": 2.667831941143919, "learning_rate": 2.1942857775806215e-06, "loss": 0.827, "step": 5171 }, { "epoch": 0.79, "grad_norm": 2.4825779833500237, "learning_rate": 2.1911879446852016e-06, "loss": 0.7071, "step": 5172 }, { "epoch": 0.79, "grad_norm": 2.874618733898814, "learning_rate": 2.1880920309767763e-06, "loss": 0.723, "step": 5173 }, { "epoch": 0.79, "grad_norm": 2.6996800389012137, "learning_rate": 2.1849980372162393e-06, "loss": 0.7636, "step": 5174 }, { "epoch": 0.79, "grad_norm": 2.922952745301586, "learning_rate": 2.1819059641640015e-06, "loss": 0.7544, "step": 5175 }, { "epoch": 0.79, "grad_norm": 2.4509868820621143, "learning_rate": 2.178815812580015e-06, "loss": 0.8044, "step": 5176 }, { "epoch": 0.79, "grad_norm": 2.7097921123798425, "learning_rate": 2.175727583223748e-06, "loss": 0.7816, "step": 5177 }, { "epoch": 0.79, "grad_norm": 2.7726244992765974, "learning_rate": 2.1726412768541984e-06, "loss": 0.8708, "step": 5178 }, { "epoch": 0.79, "grad_norm": 2.646471864402857, "learning_rate": 2.1695568942298984e-06, "loss": 0.7968, "step": 5179 }, { "epoch": 0.79, "grad_norm": 2.7006362811759965, "learning_rate": 2.1664744361089042e-06, "loss": 0.7764, "step": 5180 }, { "epoch": 0.79, "grad_norm": 2.4489036380967026, "learning_rate": 2.1633939032487883e-06, "loss": 0.7578, "step": 5181 }, { "epoch": 0.79, "grad_norm": 2.8565073056265224, "learning_rate": 2.160315296406669e-06, "loss": 0.7507, "step": 5182 }, { "epoch": 0.79, "grad_norm": 3.033558611119768, "learning_rate": 2.1572386163391767e-06, "loss": 0.8208, "step": 5183 }, { "epoch": 0.79, "grad_norm": 2.5760594105590156, "learning_rate": 2.154163863802471e-06, "loss": 0.7048, "step": 5184 }, { "epoch": 0.79, "grad_norm": 2.572431198217354, "learning_rate": 2.1510910395522454e-06, "loss": 0.7405, "step": 5185 }, { "epoch": 0.79, "grad_norm": 2.68710261224045, "learning_rate": 2.1480201443437097e-06, "loss": 0.7985, "step": 5186 }, { "epoch": 0.79, "grad_norm": 2.7139550604337064, "learning_rate": 2.144951178931608e-06, "loss": 0.7096, "step": 5187 }, { "epoch": 0.79, "grad_norm": 2.853358959872608, "learning_rate": 2.1418841440702032e-06, "loss": 0.8113, "step": 5188 }, { "epoch": 0.79, "grad_norm": 2.6227026638339734, "learning_rate": 2.1388190405132835e-06, "loss": 0.7304, "step": 5189 }, { "epoch": 0.79, "grad_norm": 2.755493062706885, "learning_rate": 2.135755869014171e-06, "loss": 0.8311, "step": 5190 }, { "epoch": 0.79, "grad_norm": 3.007649818068204, "learning_rate": 2.1326946303257055e-06, "loss": 0.7688, "step": 5191 }, { "epoch": 0.79, "grad_norm": 3.3892465626955914, "learning_rate": 2.1296353252002535e-06, "loss": 0.8222, "step": 5192 }, { "epoch": 0.79, "grad_norm": 2.8379742097250498, "learning_rate": 2.126577954389706e-06, "loss": 0.6913, "step": 5193 }, { "epoch": 0.8, "grad_norm": 2.603497319834362, "learning_rate": 2.123522518645478e-06, "loss": 0.8435, "step": 5194 }, { "epoch": 0.8, "grad_norm": 2.653831764050139, "learning_rate": 2.1204690187185083e-06, "loss": 0.8059, "step": 5195 }, { "epoch": 0.8, "grad_norm": 2.826818041431396, "learning_rate": 2.1174174553592662e-06, "loss": 0.8316, "step": 5196 }, { "epoch": 0.8, "grad_norm": 2.7569910967979547, "learning_rate": 2.114367829317737e-06, "loss": 0.7594, "step": 5197 }, { "epoch": 0.8, "grad_norm": 2.6456699660607916, "learning_rate": 2.111320141343437e-06, "loss": 0.7005, "step": 5198 }, { "epoch": 0.8, "grad_norm": 2.6894829901775865, "learning_rate": 2.1082743921853986e-06, "loss": 0.8221, "step": 5199 }, { "epoch": 0.8, "grad_norm": 2.4653459998874787, "learning_rate": 2.1052305825921814e-06, "loss": 0.7755, "step": 5200 }, { "epoch": 0.8, "grad_norm": 2.3475070217631435, "learning_rate": 2.1021887133118724e-06, "loss": 0.7037, "step": 5201 }, { "epoch": 0.8, "grad_norm": 2.5835424412431998, "learning_rate": 2.0991487850920744e-06, "loss": 0.8099, "step": 5202 }, { "epoch": 0.8, "grad_norm": 2.529540829600717, "learning_rate": 2.0961107986799177e-06, "loss": 0.7818, "step": 5203 }, { "epoch": 0.8, "grad_norm": 2.5303647554019335, "learning_rate": 2.0930747548220544e-06, "loss": 0.8147, "step": 5204 }, { "epoch": 0.8, "grad_norm": 2.680295338295072, "learning_rate": 2.0900406542646578e-06, "loss": 0.7119, "step": 5205 }, { "epoch": 0.8, "grad_norm": 2.4917329323772948, "learning_rate": 2.0870084977534234e-06, "loss": 0.709, "step": 5206 }, { "epoch": 0.8, "grad_norm": 2.680142357862478, "learning_rate": 2.0839782860335744e-06, "loss": 0.7774, "step": 5207 }, { "epoch": 0.8, "grad_norm": 2.7202509491980726, "learning_rate": 2.0809500198498465e-06, "loss": 0.7406, "step": 5208 }, { "epoch": 0.8, "grad_norm": 2.538821119730595, "learning_rate": 2.077923699946508e-06, "loss": 0.6594, "step": 5209 }, { "epoch": 0.8, "grad_norm": 2.696771041993024, "learning_rate": 2.0748993270673413e-06, "loss": 0.6496, "step": 5210 }, { "epoch": 0.8, "grad_norm": 2.9231310393079006, "learning_rate": 2.0718769019556497e-06, "loss": 0.7194, "step": 5211 }, { "epoch": 0.8, "grad_norm": 2.4163140505938148, "learning_rate": 2.0688564253542665e-06, "loss": 0.7744, "step": 5212 }, { "epoch": 0.8, "grad_norm": 2.4284110144498947, "learning_rate": 2.065837898005536e-06, "loss": 0.7628, "step": 5213 }, { "epoch": 0.8, "grad_norm": 2.564184491525239, "learning_rate": 2.0628213206513283e-06, "loss": 0.7292, "step": 5214 }, { "epoch": 0.8, "grad_norm": 2.5657572703609413, "learning_rate": 2.059806694033033e-06, "loss": 0.7039, "step": 5215 }, { "epoch": 0.8, "grad_norm": 2.61890696078291, "learning_rate": 2.056794018891559e-06, "loss": 0.7752, "step": 5216 }, { "epoch": 0.8, "grad_norm": 2.841991160663711, "learning_rate": 2.053783295967342e-06, "loss": 0.8468, "step": 5217 }, { "epoch": 0.8, "grad_norm": 2.632280940282483, "learning_rate": 2.050774526000331e-06, "loss": 0.7935, "step": 5218 }, { "epoch": 0.8, "grad_norm": 2.7062111362509427, "learning_rate": 2.0477677097299944e-06, "loss": 0.8167, "step": 5219 }, { "epoch": 0.8, "grad_norm": 2.6616929561759517, "learning_rate": 2.0447628478953285e-06, "loss": 0.8678, "step": 5220 }, { "epoch": 0.8, "grad_norm": 2.6295559456205364, "learning_rate": 2.041759941234842e-06, "loss": 0.7249, "step": 5221 }, { "epoch": 0.8, "grad_norm": 2.854155984098713, "learning_rate": 2.0387589904865624e-06, "loss": 0.8524, "step": 5222 }, { "epoch": 0.8, "grad_norm": 2.915882148566005, "learning_rate": 2.035759996388044e-06, "loss": 0.8829, "step": 5223 }, { "epoch": 0.8, "grad_norm": 2.5580168750836223, "learning_rate": 2.0327629596763522e-06, "loss": 0.7832, "step": 5224 }, { "epoch": 0.8, "grad_norm": 2.667148793752107, "learning_rate": 2.029767881088076e-06, "loss": 0.7752, "step": 5225 }, { "epoch": 0.8, "grad_norm": 2.485601095948484, "learning_rate": 2.0267747613593216e-06, "loss": 0.7068, "step": 5226 }, { "epoch": 0.8, "grad_norm": 3.3539907143857755, "learning_rate": 2.0237836012257094e-06, "loss": 0.851, "step": 5227 }, { "epoch": 0.8, "grad_norm": 3.207716113325043, "learning_rate": 2.02079440142239e-06, "loss": 0.7128, "step": 5228 }, { "epoch": 0.8, "grad_norm": 2.6476668136581853, "learning_rate": 2.0178071626840222e-06, "loss": 0.7198, "step": 5229 }, { "epoch": 0.8, "grad_norm": 2.524164666724541, "learning_rate": 2.014821885744782e-06, "loss": 0.8026, "step": 5230 }, { "epoch": 0.8, "grad_norm": 2.433542763429434, "learning_rate": 2.0118385713383717e-06, "loss": 0.6877, "step": 5231 }, { "epoch": 0.8, "grad_norm": 2.7388708012854384, "learning_rate": 2.008857220198004e-06, "loss": 0.8663, "step": 5232 }, { "epoch": 0.8, "grad_norm": 2.546278045195248, "learning_rate": 2.0058778330564087e-06, "loss": 0.7761, "step": 5233 }, { "epoch": 0.8, "grad_norm": 2.5100886555285506, "learning_rate": 2.002900410645842e-06, "loss": 0.745, "step": 5234 }, { "epoch": 0.8, "grad_norm": 2.832589902344868, "learning_rate": 1.999924953698067e-06, "loss": 0.7755, "step": 5235 }, { "epoch": 0.8, "grad_norm": 2.7081162850341634, "learning_rate": 1.9969514629443676e-06, "loss": 0.815, "step": 5236 }, { "epoch": 0.8, "grad_norm": 2.612861915456119, "learning_rate": 1.9939799391155447e-06, "loss": 0.7633, "step": 5237 }, { "epoch": 0.8, "grad_norm": 2.968076668129109, "learning_rate": 1.9910103829419136e-06, "loss": 0.7578, "step": 5238 }, { "epoch": 0.8, "grad_norm": 2.7880471818672863, "learning_rate": 1.9880427951533123e-06, "loss": 0.7828, "step": 5239 }, { "epoch": 0.8, "grad_norm": 2.5007751028079, "learning_rate": 1.9850771764790866e-06, "loss": 0.7722, "step": 5240 }, { "epoch": 0.8, "grad_norm": 2.7926472560728772, "learning_rate": 1.982113527648103e-06, "loss": 0.7983, "step": 5241 }, { "epoch": 0.8, "grad_norm": 2.688729300603576, "learning_rate": 1.9791518493887464e-06, "loss": 0.7109, "step": 5242 }, { "epoch": 0.8, "grad_norm": 2.526455965144725, "learning_rate": 1.976192142428912e-06, "loss": 0.7415, "step": 5243 }, { "epoch": 0.8, "grad_norm": 2.361831127463669, "learning_rate": 1.9732344074960106e-06, "loss": 0.6652, "step": 5244 }, { "epoch": 0.8, "grad_norm": 2.8803546372423576, "learning_rate": 1.9702786453169753e-06, "loss": 0.7467, "step": 5245 }, { "epoch": 0.8, "grad_norm": 2.606351929515565, "learning_rate": 1.967324856618247e-06, "loss": 0.7258, "step": 5246 }, { "epoch": 0.8, "grad_norm": 2.5139030510363156, "learning_rate": 1.9643730421257836e-06, "loss": 0.714, "step": 5247 }, { "epoch": 0.8, "grad_norm": 2.561178712202148, "learning_rate": 1.961423202565059e-06, "loss": 0.6892, "step": 5248 }, { "epoch": 0.8, "grad_norm": 2.6508261197310112, "learning_rate": 1.958475338661059e-06, "loss": 0.6926, "step": 5249 }, { "epoch": 0.8, "grad_norm": 3.1385624324100405, "learning_rate": 1.9555294511382895e-06, "loss": 0.8052, "step": 5250 }, { "epoch": 0.8, "grad_norm": 2.5151851416799134, "learning_rate": 1.952585540720765e-06, "loss": 0.7427, "step": 5251 }, { "epoch": 0.8, "grad_norm": 2.7096528444167545, "learning_rate": 1.949643608132015e-06, "loss": 0.854, "step": 5252 }, { "epoch": 0.8, "grad_norm": 2.6073748284771137, "learning_rate": 1.946703654095087e-06, "loss": 0.7681, "step": 5253 }, { "epoch": 0.8, "grad_norm": 2.647365142933058, "learning_rate": 1.943765679332539e-06, "loss": 0.7355, "step": 5254 }, { "epoch": 0.8, "grad_norm": 2.554651512314461, "learning_rate": 1.9408296845664374e-06, "loss": 0.7792, "step": 5255 }, { "epoch": 0.8, "grad_norm": 2.7749366497128714, "learning_rate": 1.937895670518374e-06, "loss": 0.7521, "step": 5256 }, { "epoch": 0.8, "grad_norm": 2.6113040123040587, "learning_rate": 1.934963637909445e-06, "loss": 0.7682, "step": 5257 }, { "epoch": 0.8, "grad_norm": 2.68125144170482, "learning_rate": 1.9320335874602615e-06, "loss": 0.6771, "step": 5258 }, { "epoch": 0.8, "grad_norm": 2.79670004343362, "learning_rate": 1.929105519890948e-06, "loss": 0.7837, "step": 5259 }, { "epoch": 0.81, "grad_norm": 2.858531857582643, "learning_rate": 1.9261794359211385e-06, "loss": 0.797, "step": 5260 }, { "epoch": 0.81, "grad_norm": 2.601601436421406, "learning_rate": 1.9232553362699867e-06, "loss": 0.8745, "step": 5261 }, { "epoch": 0.81, "grad_norm": 2.7155178948988015, "learning_rate": 1.920333221656152e-06, "loss": 0.7517, "step": 5262 }, { "epoch": 0.81, "grad_norm": 2.71136245899951, "learning_rate": 1.9174130927978073e-06, "loss": 0.8722, "step": 5263 }, { "epoch": 0.81, "grad_norm": 2.6266359171360882, "learning_rate": 1.914494950412642e-06, "loss": 0.769, "step": 5264 }, { "epoch": 0.81, "grad_norm": 2.676658527106433, "learning_rate": 1.9115787952178513e-06, "loss": 0.8158, "step": 5265 }, { "epoch": 0.81, "grad_norm": 2.7060334105432813, "learning_rate": 1.9086646279301414e-06, "loss": 0.7251, "step": 5266 }, { "epoch": 0.81, "grad_norm": 2.6210489060040496, "learning_rate": 1.9057524492657386e-06, "loss": 0.7473, "step": 5267 }, { "epoch": 0.81, "grad_norm": 2.662061165888899, "learning_rate": 1.902842259940373e-06, "loss": 0.88, "step": 5268 }, { "epoch": 0.81, "grad_norm": 2.5082720864633803, "learning_rate": 1.899934060669285e-06, "loss": 0.6801, "step": 5269 }, { "epoch": 0.81, "grad_norm": 3.0120117717607733, "learning_rate": 1.8970278521672314e-06, "loss": 0.7691, "step": 5270 }, { "epoch": 0.81, "grad_norm": 2.4061591232928166, "learning_rate": 1.8941236351484727e-06, "loss": 0.632, "step": 5271 }, { "epoch": 0.81, "grad_norm": 2.5620260182509225, "learning_rate": 1.891221410326789e-06, "loss": 0.826, "step": 5272 }, { "epoch": 0.81, "grad_norm": 2.6932349967864324, "learning_rate": 1.888321178415463e-06, "loss": 0.7042, "step": 5273 }, { "epoch": 0.81, "grad_norm": 2.5244052706215725, "learning_rate": 1.8854229401272883e-06, "loss": 0.766, "step": 5274 }, { "epoch": 0.81, "grad_norm": 2.7826439118860393, "learning_rate": 1.8825266961745759e-06, "loss": 0.7974, "step": 5275 }, { "epoch": 0.81, "grad_norm": 2.813590114997112, "learning_rate": 1.8796324472691386e-06, "loss": 0.7621, "step": 5276 }, { "epoch": 0.81, "grad_norm": 2.4679542676635147, "learning_rate": 1.8767401941222996e-06, "loss": 0.7581, "step": 5277 }, { "epoch": 0.81, "grad_norm": 2.6626177268115923, "learning_rate": 1.873849937444897e-06, "loss": 0.6859, "step": 5278 }, { "epoch": 0.81, "grad_norm": 2.514550595473596, "learning_rate": 1.870961677947274e-06, "loss": 0.7612, "step": 5279 }, { "epoch": 0.81, "grad_norm": 2.429325244420133, "learning_rate": 1.8680754163392821e-06, "loss": 0.8087, "step": 5280 }, { "epoch": 0.81, "grad_norm": 2.78224370282897, "learning_rate": 1.8651911533302858e-06, "loss": 0.7911, "step": 5281 }, { "epoch": 0.81, "grad_norm": 2.6394580071077534, "learning_rate": 1.8623088896291508e-06, "loss": 0.7007, "step": 5282 }, { "epoch": 0.81, "grad_norm": 3.08473638196854, "learning_rate": 1.859428625944264e-06, "loss": 0.8031, "step": 5283 }, { "epoch": 0.81, "grad_norm": 2.6230923065312197, "learning_rate": 1.8565503629835102e-06, "loss": 0.7139, "step": 5284 }, { "epoch": 0.81, "grad_norm": 2.484455187154463, "learning_rate": 1.8536741014542825e-06, "loss": 0.7813, "step": 5285 }, { "epoch": 0.81, "grad_norm": 2.9025005355173255, "learning_rate": 1.850799842063492e-06, "loss": 0.8687, "step": 5286 }, { "epoch": 0.81, "grad_norm": 2.7855494735084583, "learning_rate": 1.847927585517546e-06, "loss": 0.8703, "step": 5287 }, { "epoch": 0.81, "grad_norm": 2.7774861500730554, "learning_rate": 1.845057332522364e-06, "loss": 0.7098, "step": 5288 }, { "epoch": 0.81, "grad_norm": 2.5571608259583045, "learning_rate": 1.8421890837833789e-06, "loss": 0.7756, "step": 5289 }, { "epoch": 0.81, "grad_norm": 2.6508095224438284, "learning_rate": 1.8393228400055197e-06, "loss": 0.7547, "step": 5290 }, { "epoch": 0.81, "grad_norm": 2.4331685506681624, "learning_rate": 1.836458601893234e-06, "loss": 0.6196, "step": 5291 }, { "epoch": 0.81, "grad_norm": 2.8057215466078, "learning_rate": 1.833596370150469e-06, "loss": 0.8488, "step": 5292 }, { "epoch": 0.81, "grad_norm": 2.8768433031219436, "learning_rate": 1.8307361454806815e-06, "loss": 0.8106, "step": 5293 }, { "epoch": 0.81, "grad_norm": 2.6425605653461894, "learning_rate": 1.8278779285868332e-06, "loss": 0.8511, "step": 5294 }, { "epoch": 0.81, "grad_norm": 3.2196702056179816, "learning_rate": 1.8250217201713938e-06, "loss": 0.7947, "step": 5295 }, { "epoch": 0.81, "grad_norm": 2.6762413209747815, "learning_rate": 1.8221675209363376e-06, "loss": 0.7019, "step": 5296 }, { "epoch": 0.81, "grad_norm": 3.218514063148928, "learning_rate": 1.8193153315831514e-06, "loss": 0.7803, "step": 5297 }, { "epoch": 0.81, "grad_norm": 2.7580589992515314, "learning_rate": 1.8164651528128208e-06, "loss": 0.7302, "step": 5298 }, { "epoch": 0.81, "grad_norm": 2.738998097998686, "learning_rate": 1.8136169853258379e-06, "loss": 0.7869, "step": 5299 }, { "epoch": 0.81, "grad_norm": 2.5623291055732937, "learning_rate": 1.810770829822206e-06, "loss": 0.776, "step": 5300 }, { "epoch": 0.81, "grad_norm": 3.544255059308854, "learning_rate": 1.8079266870014266e-06, "loss": 0.8067, "step": 5301 }, { "epoch": 0.81, "grad_norm": 2.6241190831600134, "learning_rate": 1.8050845575625142e-06, "loss": 0.6629, "step": 5302 }, { "epoch": 0.81, "grad_norm": 2.8966106388304853, "learning_rate": 1.802244442203983e-06, "loss": 0.8091, "step": 5303 }, { "epoch": 0.81, "grad_norm": 2.536447184938207, "learning_rate": 1.7994063416238528e-06, "loss": 0.7255, "step": 5304 }, { "epoch": 0.81, "grad_norm": 2.628368067949291, "learning_rate": 1.7965702565196496e-06, "loss": 0.7316, "step": 5305 }, { "epoch": 0.81, "grad_norm": 2.7213475845824426, "learning_rate": 1.793736187588404e-06, "loss": 0.8211, "step": 5306 }, { "epoch": 0.81, "grad_norm": 2.5743627911226468, "learning_rate": 1.7909041355266465e-06, "loss": 0.7483, "step": 5307 }, { "epoch": 0.81, "grad_norm": 2.6514697681954487, "learning_rate": 1.7880741010304236e-06, "loss": 0.8514, "step": 5308 }, { "epoch": 0.81, "grad_norm": 2.6117066555165516, "learning_rate": 1.7852460847952745e-06, "loss": 0.7341, "step": 5309 }, { "epoch": 0.81, "grad_norm": 2.7937497971549448, "learning_rate": 1.7824200875162435e-06, "loss": 0.8227, "step": 5310 }, { "epoch": 0.81, "grad_norm": 2.6601227382781683, "learning_rate": 1.7795961098878867e-06, "loss": 0.6883, "step": 5311 }, { "epoch": 0.81, "grad_norm": 2.555270181944585, "learning_rate": 1.7767741526042537e-06, "loss": 0.7181, "step": 5312 }, { "epoch": 0.81, "grad_norm": 2.505665661355078, "learning_rate": 1.7739542163589074e-06, "loss": 0.7687, "step": 5313 }, { "epoch": 0.81, "grad_norm": 2.898396487815443, "learning_rate": 1.7711363018449068e-06, "loss": 0.8078, "step": 5314 }, { "epoch": 0.81, "grad_norm": 3.3112582709990637, "learning_rate": 1.7683204097548157e-06, "loss": 0.8103, "step": 5315 }, { "epoch": 0.81, "grad_norm": 2.6724151435389834, "learning_rate": 1.7655065407807025e-06, "loss": 0.7417, "step": 5316 }, { "epoch": 0.81, "grad_norm": 2.6001799710561384, "learning_rate": 1.7626946956141355e-06, "loss": 0.8023, "step": 5317 }, { "epoch": 0.81, "grad_norm": 2.5608651624482253, "learning_rate": 1.759884874946187e-06, "loss": 0.7915, "step": 5318 }, { "epoch": 0.81, "grad_norm": 3.4450320366094944, "learning_rate": 1.7570770794674352e-06, "loss": 0.8027, "step": 5319 }, { "epoch": 0.81, "grad_norm": 2.7623894648539227, "learning_rate": 1.7542713098679564e-06, "loss": 0.7392, "step": 5320 }, { "epoch": 0.81, "grad_norm": 2.7112253465641376, "learning_rate": 1.7514675668373272e-06, "loss": 0.7352, "step": 5321 }, { "epoch": 0.81, "grad_norm": 2.462287508288741, "learning_rate": 1.7486658510646337e-06, "loss": 0.8257, "step": 5322 }, { "epoch": 0.81, "grad_norm": 2.3514294846209958, "learning_rate": 1.7458661632384532e-06, "loss": 0.7096, "step": 5323 }, { "epoch": 0.81, "grad_norm": 2.6405501271234275, "learning_rate": 1.7430685040468775e-06, "loss": 0.755, "step": 5324 }, { "epoch": 0.82, "grad_norm": 2.8005781186999488, "learning_rate": 1.7402728741774887e-06, "loss": 0.681, "step": 5325 }, { "epoch": 0.82, "grad_norm": 2.7722066542218786, "learning_rate": 1.737479274317375e-06, "loss": 0.7788, "step": 5326 }, { "epoch": 0.82, "grad_norm": 2.5659134830313954, "learning_rate": 1.7346877051531241e-06, "loss": 0.7842, "step": 5327 }, { "epoch": 0.82, "grad_norm": 2.438173108512466, "learning_rate": 1.731898167370827e-06, "loss": 0.8067, "step": 5328 }, { "epoch": 0.82, "grad_norm": 2.5327512937131007, "learning_rate": 1.7291106616560693e-06, "loss": 0.7078, "step": 5329 }, { "epoch": 0.82, "grad_norm": 2.836323440661082, "learning_rate": 1.726325188693948e-06, "loss": 0.7786, "step": 5330 }, { "epoch": 0.82, "grad_norm": 2.800206264294481, "learning_rate": 1.7235417491690509e-06, "loss": 0.8309, "step": 5331 }, { "epoch": 0.82, "grad_norm": 2.5362830414927586, "learning_rate": 1.7207603437654674e-06, "loss": 0.6841, "step": 5332 }, { "epoch": 0.82, "grad_norm": 2.426639483725904, "learning_rate": 1.7179809731667952e-06, "loss": 0.7774, "step": 5333 }, { "epoch": 0.82, "grad_norm": 2.562833126398352, "learning_rate": 1.7152036380561176e-06, "loss": 0.7171, "step": 5334 }, { "epoch": 0.82, "grad_norm": 2.7560900086185023, "learning_rate": 1.7124283391160335e-06, "loss": 0.7865, "step": 5335 }, { "epoch": 0.82, "grad_norm": 2.6461379808218592, "learning_rate": 1.7096550770286302e-06, "loss": 0.781, "step": 5336 }, { "epoch": 0.82, "grad_norm": 2.56642023701555, "learning_rate": 1.7068838524754961e-06, "loss": 0.7718, "step": 5337 }, { "epoch": 0.82, "grad_norm": 2.5198940553270335, "learning_rate": 1.7041146661377229e-06, "loss": 0.8147, "step": 5338 }, { "epoch": 0.82, "grad_norm": 3.2246821208943555, "learning_rate": 1.7013475186958983e-06, "loss": 0.7905, "step": 5339 }, { "epoch": 0.82, "grad_norm": 2.488770000107356, "learning_rate": 1.6985824108301063e-06, "loss": 0.7767, "step": 5340 }, { "epoch": 0.82, "grad_norm": 2.579072452946949, "learning_rate": 1.6958193432199377e-06, "loss": 0.714, "step": 5341 }, { "epoch": 0.82, "grad_norm": 2.636564346699858, "learning_rate": 1.6930583165444759e-06, "loss": 0.8383, "step": 5342 }, { "epoch": 0.82, "grad_norm": 2.763894488128038, "learning_rate": 1.6902993314823003e-06, "loss": 0.8249, "step": 5343 }, { "epoch": 0.82, "grad_norm": 2.841520959324338, "learning_rate": 1.6875423887114973e-06, "loss": 0.7667, "step": 5344 }, { "epoch": 0.82, "grad_norm": 2.721729548909902, "learning_rate": 1.6847874889096404e-06, "loss": 0.8535, "step": 5345 }, { "epoch": 0.82, "grad_norm": 2.353445242020036, "learning_rate": 1.682034632753813e-06, "loss": 0.7144, "step": 5346 }, { "epoch": 0.82, "grad_norm": 2.86263924929456, "learning_rate": 1.6792838209205865e-06, "loss": 0.7944, "step": 5347 }, { "epoch": 0.82, "grad_norm": 2.6271226482000403, "learning_rate": 1.6765350540860336e-06, "loss": 0.8035, "step": 5348 }, { "epoch": 0.82, "grad_norm": 2.7871625156641815, "learning_rate": 1.6737883329257242e-06, "loss": 0.7978, "step": 5349 }, { "epoch": 0.82, "grad_norm": 2.7585265680058004, "learning_rate": 1.6710436581147248e-06, "loss": 0.7069, "step": 5350 }, { "epoch": 0.82, "grad_norm": 2.973963568363898, "learning_rate": 1.6683010303275982e-06, "loss": 0.7856, "step": 5351 }, { "epoch": 0.82, "grad_norm": 2.640924319007717, "learning_rate": 1.665560450238408e-06, "loss": 0.7277, "step": 5352 }, { "epoch": 0.82, "grad_norm": 2.6543214807904603, "learning_rate": 1.6628219185207127e-06, "loss": 0.7371, "step": 5353 }, { "epoch": 0.82, "grad_norm": 2.5983459295205256, "learning_rate": 1.6600854358475615e-06, "loss": 0.6777, "step": 5354 }, { "epoch": 0.82, "grad_norm": 2.5050712357400977, "learning_rate": 1.6573510028915118e-06, "loss": 0.7303, "step": 5355 }, { "epoch": 0.82, "grad_norm": 2.581829671919241, "learning_rate": 1.654618620324604e-06, "loss": 0.8259, "step": 5356 }, { "epoch": 0.82, "grad_norm": 2.757472544258648, "learning_rate": 1.6518882888183874e-06, "loss": 0.743, "step": 5357 }, { "epoch": 0.82, "grad_norm": 2.6302986483984796, "learning_rate": 1.6491600090438976e-06, "loss": 0.7763, "step": 5358 }, { "epoch": 0.82, "grad_norm": 2.392032692849635, "learning_rate": 1.646433781671669e-06, "loss": 0.752, "step": 5359 }, { "epoch": 0.82, "grad_norm": 2.4315483130409152, "learning_rate": 1.643709607371733e-06, "loss": 0.7202, "step": 5360 }, { "epoch": 0.82, "grad_norm": 2.6533577527278807, "learning_rate": 1.6409874868136132e-06, "loss": 0.7539, "step": 5361 }, { "epoch": 0.82, "grad_norm": 2.7936297349799672, "learning_rate": 1.6382674206663308e-06, "loss": 0.867, "step": 5362 }, { "epoch": 0.82, "grad_norm": 2.486355890016533, "learning_rate": 1.6355494095984049e-06, "loss": 0.6968, "step": 5363 }, { "epoch": 0.82, "grad_norm": 2.992793892900273, "learning_rate": 1.6328334542778423e-06, "loss": 0.8133, "step": 5364 }, { "epoch": 0.82, "grad_norm": 3.0974063076184413, "learning_rate": 1.6301195553721528e-06, "loss": 0.7223, "step": 5365 }, { "epoch": 0.82, "grad_norm": 2.5535242999454044, "learning_rate": 1.6274077135483336e-06, "loss": 0.8412, "step": 5366 }, { "epoch": 0.82, "grad_norm": 3.0316245210841513, "learning_rate": 1.6246979294728793e-06, "loss": 0.7508, "step": 5367 }, { "epoch": 0.82, "grad_norm": 2.7099534979043196, "learning_rate": 1.6219902038117807e-06, "loss": 0.7069, "step": 5368 }, { "epoch": 0.82, "grad_norm": 3.0971035301260312, "learning_rate": 1.619284537230521e-06, "loss": 0.7463, "step": 5369 }, { "epoch": 0.82, "grad_norm": 3.4461091608804377, "learning_rate": 1.6165809303940761e-06, "loss": 0.8191, "step": 5370 }, { "epoch": 0.82, "grad_norm": 2.8631386451165426, "learning_rate": 1.6138793839669165e-06, "loss": 0.6966, "step": 5371 }, { "epoch": 0.82, "grad_norm": 2.970022929368473, "learning_rate": 1.6111798986130067e-06, "loss": 0.7699, "step": 5372 }, { "epoch": 0.82, "grad_norm": 2.752622230132209, "learning_rate": 1.6084824749958017e-06, "loss": 0.7213, "step": 5373 }, { "epoch": 0.82, "grad_norm": 2.689384664158231, "learning_rate": 1.6057871137782578e-06, "loss": 0.7925, "step": 5374 }, { "epoch": 0.82, "grad_norm": 2.3902986847975325, "learning_rate": 1.603093815622815e-06, "loss": 0.7179, "step": 5375 }, { "epoch": 0.82, "grad_norm": 2.6156407562968806, "learning_rate": 1.6004025811914147e-06, "loss": 0.7825, "step": 5376 }, { "epoch": 0.82, "grad_norm": 2.8870697987925333, "learning_rate": 1.5977134111454839e-06, "loss": 0.7597, "step": 5377 }, { "epoch": 0.82, "grad_norm": 3.5329585787430418, "learning_rate": 1.5950263061459437e-06, "loss": 0.8816, "step": 5378 }, { "epoch": 0.82, "grad_norm": 2.706232341302241, "learning_rate": 1.5923412668532135e-06, "loss": 0.7472, "step": 5379 }, { "epoch": 0.82, "grad_norm": 2.538461844289609, "learning_rate": 1.5896582939271976e-06, "loss": 0.7767, "step": 5380 }, { "epoch": 0.82, "grad_norm": 2.6899633155459206, "learning_rate": 1.5869773880272964e-06, "loss": 0.7833, "step": 5381 }, { "epoch": 0.82, "grad_norm": 2.6689412056515254, "learning_rate": 1.584298549812402e-06, "loss": 0.7906, "step": 5382 }, { "epoch": 0.82, "grad_norm": 2.616921439622051, "learning_rate": 1.5816217799408962e-06, "loss": 0.7398, "step": 5383 }, { "epoch": 0.82, "grad_norm": 2.5860191831094523, "learning_rate": 1.578947079070652e-06, "loss": 0.785, "step": 5384 }, { "epoch": 0.82, "grad_norm": 2.7409743051749684, "learning_rate": 1.576274447859041e-06, "loss": 0.7139, "step": 5385 }, { "epoch": 0.82, "grad_norm": 2.5230935232058465, "learning_rate": 1.5736038869629168e-06, "loss": 0.7318, "step": 5386 }, { "epoch": 0.82, "grad_norm": 2.8231121808138373, "learning_rate": 1.5709353970386322e-06, "loss": 0.885, "step": 5387 }, { "epoch": 0.82, "grad_norm": 2.539737814038569, "learning_rate": 1.5682689787420246e-06, "loss": 0.7765, "step": 5388 }, { "epoch": 0.82, "grad_norm": 2.623946125219109, "learning_rate": 1.5656046327284225e-06, "loss": 0.7303, "step": 5389 }, { "epoch": 0.83, "grad_norm": 2.6049099707804535, "learning_rate": 1.5629423596526528e-06, "loss": 0.8136, "step": 5390 }, { "epoch": 0.83, "grad_norm": 2.759094162043502, "learning_rate": 1.5602821601690254e-06, "loss": 0.7784, "step": 5391 }, { "epoch": 0.83, "grad_norm": 2.8019148720081737, "learning_rate": 1.557624034931342e-06, "loss": 0.7514, "step": 5392 }, { "epoch": 0.83, "grad_norm": 2.559663404527581, "learning_rate": 1.5549679845928956e-06, "loss": 0.8213, "step": 5393 }, { "epoch": 0.83, "grad_norm": 2.6417069273809766, "learning_rate": 1.5523140098064692e-06, "loss": 0.8017, "step": 5394 }, { "epoch": 0.83, "grad_norm": 2.8132581020986205, "learning_rate": 1.5496621112243327e-06, "loss": 0.842, "step": 5395 }, { "epoch": 0.83, "grad_norm": 2.539662982926937, "learning_rate": 1.547012289498252e-06, "loss": 0.8017, "step": 5396 }, { "epoch": 0.83, "grad_norm": 3.2025744239552973, "learning_rate": 1.5443645452794754e-06, "loss": 0.7549, "step": 5397 }, { "epoch": 0.83, "grad_norm": 2.7559697768379205, "learning_rate": 1.5417188792187488e-06, "loss": 0.803, "step": 5398 }, { "epoch": 0.83, "grad_norm": 2.603866972428103, "learning_rate": 1.539075291966301e-06, "loss": 0.9145, "step": 5399 }, { "epoch": 0.83, "grad_norm": 2.600906822519969, "learning_rate": 1.536433784171848e-06, "loss": 0.8172, "step": 5400 }, { "epoch": 0.83, "grad_norm": 2.7699414581326964, "learning_rate": 1.5337943564846035e-06, "loss": 0.716, "step": 5401 }, { "epoch": 0.83, "grad_norm": 2.4063352904234256, "learning_rate": 1.5311570095532636e-06, "loss": 0.8076, "step": 5402 }, { "epoch": 0.83, "grad_norm": 2.5692469504876687, "learning_rate": 1.5285217440260092e-06, "loss": 0.7613, "step": 5403 }, { "epoch": 0.83, "grad_norm": 2.572772925076905, "learning_rate": 1.5258885605505226e-06, "loss": 0.7561, "step": 5404 }, { "epoch": 0.83, "grad_norm": 2.513167661863724, "learning_rate": 1.5232574597739635e-06, "loss": 0.7256, "step": 5405 }, { "epoch": 0.83, "grad_norm": 2.566421922523046, "learning_rate": 1.5206284423429773e-06, "loss": 0.7812, "step": 5406 }, { "epoch": 0.83, "grad_norm": 3.3353428043208844, "learning_rate": 1.5180015089037093e-06, "loss": 0.8176, "step": 5407 }, { "epoch": 0.83, "grad_norm": 2.5207035562280757, "learning_rate": 1.5153766601017816e-06, "loss": 0.8464, "step": 5408 }, { "epoch": 0.83, "grad_norm": 2.5939553659111687, "learning_rate": 1.512753896582313e-06, "loss": 0.7761, "step": 5409 }, { "epoch": 0.83, "grad_norm": 2.4308392420491383, "learning_rate": 1.5101332189899032e-06, "loss": 0.7191, "step": 5410 }, { "epoch": 0.83, "grad_norm": 2.752464101088354, "learning_rate": 1.507514627968638e-06, "loss": 0.7498, "step": 5411 }, { "epoch": 0.83, "grad_norm": 2.573045601314764, "learning_rate": 1.5048981241620996e-06, "loss": 0.7168, "step": 5412 }, { "epoch": 0.83, "grad_norm": 3.374466306394915, "learning_rate": 1.5022837082133479e-06, "loss": 0.8401, "step": 5413 }, { "epoch": 0.83, "grad_norm": 2.908769349069825, "learning_rate": 1.499671380764931e-06, "loss": 0.7582, "step": 5414 }, { "epoch": 0.83, "grad_norm": 2.5939245885102267, "learning_rate": 1.49706114245889e-06, "loss": 0.8097, "step": 5415 }, { "epoch": 0.83, "grad_norm": 5.456523353585106, "learning_rate": 1.494452993936747e-06, "loss": 0.7621, "step": 5416 }, { "epoch": 0.83, "grad_norm": 3.479603609725367, "learning_rate": 1.4918469358395104e-06, "loss": 0.8219, "step": 5417 }, { "epoch": 0.83, "grad_norm": 2.7942623118175147, "learning_rate": 1.4892429688076771e-06, "loss": 0.7696, "step": 5418 }, { "epoch": 0.83, "grad_norm": 2.46734362607315, "learning_rate": 1.486641093481227e-06, "loss": 0.6862, "step": 5419 }, { "epoch": 0.83, "grad_norm": 2.955292126554179, "learning_rate": 1.4840413104996322e-06, "loss": 0.7807, "step": 5420 }, { "epoch": 0.83, "grad_norm": 2.656829684311352, "learning_rate": 1.4814436205018435e-06, "loss": 0.6895, "step": 5421 }, { "epoch": 0.83, "grad_norm": 2.379219039208738, "learning_rate": 1.4788480241262992e-06, "loss": 0.7099, "step": 5422 }, { "epoch": 0.83, "grad_norm": 2.513725790883196, "learning_rate": 1.4762545220109292e-06, "loss": 0.6907, "step": 5423 }, { "epoch": 0.83, "grad_norm": 3.895226797679898, "learning_rate": 1.473663114793139e-06, "loss": 0.7467, "step": 5424 }, { "epoch": 0.83, "grad_norm": 2.5764193868923346, "learning_rate": 1.4710738031098226e-06, "loss": 0.6766, "step": 5425 }, { "epoch": 0.83, "grad_norm": 2.5882262502356967, "learning_rate": 1.4684865875973663e-06, "loss": 0.7805, "step": 5426 }, { "epoch": 0.83, "grad_norm": 2.5245263727879115, "learning_rate": 1.4659014688916306e-06, "loss": 0.7652, "step": 5427 }, { "epoch": 0.83, "grad_norm": 2.4390552809494097, "learning_rate": 1.463318447627966e-06, "loss": 0.6893, "step": 5428 }, { "epoch": 0.83, "grad_norm": 2.4830323556842315, "learning_rate": 1.460737524441207e-06, "loss": 0.8335, "step": 5429 }, { "epoch": 0.83, "grad_norm": 2.580296423991282, "learning_rate": 1.4581586999656706e-06, "loss": 0.7148, "step": 5430 }, { "epoch": 0.83, "grad_norm": 2.531672934729575, "learning_rate": 1.455581974835162e-06, "loss": 0.8116, "step": 5431 }, { "epoch": 0.83, "grad_norm": 3.097529937171779, "learning_rate": 1.4530073496829667e-06, "loss": 0.7705, "step": 5432 }, { "epoch": 0.83, "grad_norm": 2.462077058668238, "learning_rate": 1.4504348251418532e-06, "loss": 0.7132, "step": 5433 }, { "epoch": 0.83, "grad_norm": 2.510920740948573, "learning_rate": 1.4478644018440813e-06, "loss": 0.7715, "step": 5434 }, { "epoch": 0.83, "grad_norm": 2.7183210859443965, "learning_rate": 1.4452960804213844e-06, "loss": 0.7191, "step": 5435 }, { "epoch": 0.83, "grad_norm": 2.4965139774054697, "learning_rate": 1.4427298615049834e-06, "loss": 0.7392, "step": 5436 }, { "epoch": 0.83, "grad_norm": 2.4493008215582424, "learning_rate": 1.440165745725588e-06, "loss": 0.6751, "step": 5437 }, { "epoch": 0.83, "grad_norm": 2.8053401617693123, "learning_rate": 1.4376037337133818e-06, "loss": 0.8177, "step": 5438 }, { "epoch": 0.83, "grad_norm": 2.6707495812946984, "learning_rate": 1.435043826098037e-06, "loss": 0.718, "step": 5439 }, { "epoch": 0.83, "grad_norm": 2.766260855397848, "learning_rate": 1.4324860235087069e-06, "loss": 0.6813, "step": 5440 }, { "epoch": 0.83, "grad_norm": 2.7959908546395873, "learning_rate": 1.4299303265740238e-06, "loss": 0.7618, "step": 5441 }, { "epoch": 0.83, "grad_norm": 2.5790556035434795, "learning_rate": 1.4273767359221125e-06, "loss": 0.7761, "step": 5442 }, { "epoch": 0.83, "grad_norm": 2.667404676892974, "learning_rate": 1.4248252521805716e-06, "loss": 0.7206, "step": 5443 }, { "epoch": 0.83, "grad_norm": 2.9916119664187026, "learning_rate": 1.422275875976482e-06, "loss": 0.6933, "step": 5444 }, { "epoch": 0.83, "grad_norm": 2.9269245266415003, "learning_rate": 1.4197286079364125e-06, "loss": 0.7127, "step": 5445 }, { "epoch": 0.83, "grad_norm": 2.7534623467804686, "learning_rate": 1.4171834486864089e-06, "loss": 0.7684, "step": 5446 }, { "epoch": 0.83, "grad_norm": 2.8876537105947166, "learning_rate": 1.4146403988519963e-06, "loss": 0.7838, "step": 5447 }, { "epoch": 0.83, "grad_norm": 2.8182121582454123, "learning_rate": 1.4120994590581916e-06, "loss": 0.7686, "step": 5448 }, { "epoch": 0.83, "grad_norm": 3.082920784480041, "learning_rate": 1.4095606299294827e-06, "loss": 0.8127, "step": 5449 }, { "epoch": 0.83, "grad_norm": 2.8232693322075004, "learning_rate": 1.4070239120898433e-06, "loss": 0.7653, "step": 5450 }, { "epoch": 0.83, "grad_norm": 3.1856411989759135, "learning_rate": 1.4044893061627263e-06, "loss": 0.8263, "step": 5451 }, { "epoch": 0.83, "grad_norm": 2.676050937118328, "learning_rate": 1.4019568127710659e-06, "loss": 0.6561, "step": 5452 }, { "epoch": 0.83, "grad_norm": 2.6926069696599217, "learning_rate": 1.399426432537283e-06, "loss": 0.7164, "step": 5453 }, { "epoch": 0.83, "grad_norm": 2.633397695011211, "learning_rate": 1.3968981660832693e-06, "loss": 0.7313, "step": 5454 }, { "epoch": 0.83, "grad_norm": 2.7503718548301737, "learning_rate": 1.3943720140304018e-06, "loss": 0.8687, "step": 5455 }, { "epoch": 0.84, "grad_norm": 2.7667013768472226, "learning_rate": 1.3918479769995418e-06, "loss": 0.759, "step": 5456 }, { "epoch": 0.84, "grad_norm": 2.7654801691226893, "learning_rate": 1.3893260556110243e-06, "loss": 0.7636, "step": 5457 }, { "epoch": 0.84, "grad_norm": 2.5775808923066905, "learning_rate": 1.3868062504846646e-06, "loss": 0.7662, "step": 5458 }, { "epoch": 0.84, "grad_norm": 2.6147303801081803, "learning_rate": 1.384288562239765e-06, "loss": 0.7888, "step": 5459 }, { "epoch": 0.84, "grad_norm": 2.555246259881729, "learning_rate": 1.3817729914950995e-06, "loss": 0.7107, "step": 5460 }, { "epoch": 0.84, "grad_norm": 2.6749101810783924, "learning_rate": 1.3792595388689267e-06, "loss": 0.8295, "step": 5461 }, { "epoch": 0.84, "grad_norm": 2.9072465544160893, "learning_rate": 1.3767482049789804e-06, "loss": 0.7472, "step": 5462 }, { "epoch": 0.84, "grad_norm": 2.7431607296729603, "learning_rate": 1.3742389904424747e-06, "loss": 0.7544, "step": 5463 }, { "epoch": 0.84, "grad_norm": 2.4523455376569108, "learning_rate": 1.3717318958761094e-06, "loss": 0.7079, "step": 5464 }, { "epoch": 0.84, "grad_norm": 2.6762219222918024, "learning_rate": 1.3692269218960553e-06, "loss": 0.8438, "step": 5465 }, { "epoch": 0.84, "grad_norm": 2.5159210232208418, "learning_rate": 1.3667240691179618e-06, "loss": 0.8016, "step": 5466 }, { "epoch": 0.84, "grad_norm": 2.7143958324723036, "learning_rate": 1.3642233381569657e-06, "loss": 0.7513, "step": 5467 }, { "epoch": 0.84, "grad_norm": 3.30670046856752, "learning_rate": 1.3617247296276737e-06, "loss": 0.805, "step": 5468 }, { "epoch": 0.84, "grad_norm": 2.711816364111367, "learning_rate": 1.3592282441441707e-06, "loss": 0.6686, "step": 5469 }, { "epoch": 0.84, "grad_norm": 2.8587264851624865, "learning_rate": 1.3567338823200293e-06, "loss": 0.7559, "step": 5470 }, { "epoch": 0.84, "grad_norm": 2.9278007847640493, "learning_rate": 1.3542416447682893e-06, "loss": 0.7923, "step": 5471 }, { "epoch": 0.84, "grad_norm": 2.784064503676124, "learning_rate": 1.351751532101473e-06, "loss": 0.8089, "step": 5472 }, { "epoch": 0.84, "grad_norm": 2.469197716587361, "learning_rate": 1.3492635449315817e-06, "loss": 0.7145, "step": 5473 }, { "epoch": 0.84, "grad_norm": 2.5894330340506713, "learning_rate": 1.3467776838700896e-06, "loss": 0.7227, "step": 5474 }, { "epoch": 0.84, "grad_norm": 2.471371802458738, "learning_rate": 1.344293949527956e-06, "loss": 0.678, "step": 5475 }, { "epoch": 0.84, "grad_norm": 2.798773140145835, "learning_rate": 1.3418123425156115e-06, "loss": 0.7428, "step": 5476 }, { "epoch": 0.84, "grad_norm": 2.5164154651123667, "learning_rate": 1.3393328634429636e-06, "loss": 0.7172, "step": 5477 }, { "epoch": 0.84, "grad_norm": 3.917342816664016, "learning_rate": 1.3368555129194016e-06, "loss": 0.8129, "step": 5478 }, { "epoch": 0.84, "grad_norm": 2.638060194697016, "learning_rate": 1.3343802915537885e-06, "loss": 0.8024, "step": 5479 }, { "epoch": 0.84, "grad_norm": 2.7588261673114265, "learning_rate": 1.3319071999544607e-06, "loss": 0.8135, "step": 5480 }, { "epoch": 0.84, "grad_norm": 2.551590169459782, "learning_rate": 1.3294362387292391e-06, "loss": 0.7706, "step": 5481 }, { "epoch": 0.84, "grad_norm": 2.944194600995512, "learning_rate": 1.3269674084854156e-06, "loss": 0.8076, "step": 5482 }, { "epoch": 0.84, "grad_norm": 2.6657774464142485, "learning_rate": 1.324500709829759e-06, "loss": 0.8025, "step": 5483 }, { "epoch": 0.84, "grad_norm": 3.0273054989836354, "learning_rate": 1.3220361433685137e-06, "loss": 0.8681, "step": 5484 }, { "epoch": 0.84, "grad_norm": 3.2634353936725327, "learning_rate": 1.319573709707399e-06, "loss": 0.8016, "step": 5485 }, { "epoch": 0.84, "grad_norm": 2.773258396954878, "learning_rate": 1.317113409451618e-06, "loss": 0.7462, "step": 5486 }, { "epoch": 0.84, "grad_norm": 2.4504422966436388, "learning_rate": 1.31465524320584e-06, "loss": 0.7048, "step": 5487 }, { "epoch": 0.84, "grad_norm": 2.6226489048346395, "learning_rate": 1.3121992115742122e-06, "loss": 0.7727, "step": 5488 }, { "epoch": 0.84, "grad_norm": 2.499913869641089, "learning_rate": 1.3097453151603602e-06, "loss": 0.8013, "step": 5489 }, { "epoch": 0.84, "grad_norm": 2.7080732867459343, "learning_rate": 1.3072935545673836e-06, "loss": 0.8594, "step": 5490 }, { "epoch": 0.84, "grad_norm": 2.6214148178831653, "learning_rate": 1.3048439303978534e-06, "loss": 0.8599, "step": 5491 }, { "epoch": 0.84, "grad_norm": 2.8228550531261165, "learning_rate": 1.3023964432538216e-06, "loss": 0.7535, "step": 5492 }, { "epoch": 0.84, "grad_norm": 2.6231774742032004, "learning_rate": 1.2999510937368109e-06, "loss": 0.742, "step": 5493 }, { "epoch": 0.84, "grad_norm": 2.7395097723549884, "learning_rate": 1.2975078824478181e-06, "loss": 0.765, "step": 5494 }, { "epoch": 0.84, "grad_norm": 2.5325901699463103, "learning_rate": 1.2950668099873186e-06, "loss": 0.8974, "step": 5495 }, { "epoch": 0.84, "grad_norm": 2.7576079244145997, "learning_rate": 1.2926278769552558e-06, "loss": 0.8176, "step": 5496 }, { "epoch": 0.84, "grad_norm": 2.6983001920413723, "learning_rate": 1.2901910839510557e-06, "loss": 0.7643, "step": 5497 }, { "epoch": 0.84, "grad_norm": 2.7065562167614767, "learning_rate": 1.2877564315736114e-06, "loss": 0.8274, "step": 5498 }, { "epoch": 0.84, "grad_norm": 2.592440020847409, "learning_rate": 1.2853239204212908e-06, "loss": 0.7146, "step": 5499 }, { "epoch": 0.84, "grad_norm": 2.6082475382596924, "learning_rate": 1.2828935510919393e-06, "loss": 0.6917, "step": 5500 }, { "epoch": 0.84, "grad_norm": 2.7716965757094347, "learning_rate": 1.2804653241828724e-06, "loss": 0.7834, "step": 5501 }, { "epoch": 0.84, "grad_norm": 2.4278847914452375, "learning_rate": 1.2780392402908793e-06, "loss": 0.7978, "step": 5502 }, { "epoch": 0.84, "grad_norm": 2.6464039784162, "learning_rate": 1.2756153000122252e-06, "loss": 0.7255, "step": 5503 }, { "epoch": 0.84, "grad_norm": 2.638944099478603, "learning_rate": 1.2731935039426469e-06, "loss": 0.744, "step": 5504 }, { "epoch": 0.84, "grad_norm": 2.65867797896158, "learning_rate": 1.2707738526773528e-06, "loss": 0.7639, "step": 5505 }, { "epoch": 0.84, "grad_norm": 2.81615400279156, "learning_rate": 1.268356346811025e-06, "loss": 0.9008, "step": 5506 }, { "epoch": 0.84, "grad_norm": 2.6510825781269673, "learning_rate": 1.2659409869378159e-06, "loss": 0.818, "step": 5507 }, { "epoch": 0.84, "grad_norm": 2.866416891237285, "learning_rate": 1.2635277736513596e-06, "loss": 0.804, "step": 5508 }, { "epoch": 0.84, "grad_norm": 3.0494698466774426, "learning_rate": 1.2611167075447527e-06, "loss": 0.7794, "step": 5509 }, { "epoch": 0.84, "grad_norm": 2.7386950159586627, "learning_rate": 1.258707789210566e-06, "loss": 0.7816, "step": 5510 }, { "epoch": 0.84, "grad_norm": 2.785828567783617, "learning_rate": 1.2563010192408487e-06, "loss": 0.7821, "step": 5511 }, { "epoch": 0.84, "grad_norm": 2.5718903666323296, "learning_rate": 1.2538963982271135e-06, "loss": 0.7565, "step": 5512 }, { "epoch": 0.84, "grad_norm": 3.124178037199288, "learning_rate": 1.2514939267603489e-06, "loss": 0.823, "step": 5513 }, { "epoch": 0.84, "grad_norm": 2.7898308110383474, "learning_rate": 1.2490936054310176e-06, "loss": 0.7378, "step": 5514 }, { "epoch": 0.84, "grad_norm": 2.4373883835923507, "learning_rate": 1.2466954348290473e-06, "loss": 0.7807, "step": 5515 }, { "epoch": 0.84, "grad_norm": 2.466570363766371, "learning_rate": 1.244299415543846e-06, "loss": 0.7284, "step": 5516 }, { "epoch": 0.84, "grad_norm": 2.5189153820994323, "learning_rate": 1.2419055481642873e-06, "loss": 0.7953, "step": 5517 }, { "epoch": 0.84, "grad_norm": 2.6885918862881444, "learning_rate": 1.2395138332787105e-06, "loss": 0.7116, "step": 5518 }, { "epoch": 0.84, "grad_norm": 2.5023705356369255, "learning_rate": 1.2371242714749388e-06, "loss": 0.7292, "step": 5519 }, { "epoch": 0.84, "grad_norm": 2.5056392809495147, "learning_rate": 1.2347368633402578e-06, "loss": 0.7093, "step": 5520 }, { "epoch": 0.85, "grad_norm": 2.6732155135175915, "learning_rate": 1.2323516094614218e-06, "loss": 0.8043, "step": 5521 }, { "epoch": 0.85, "grad_norm": 2.5131745570401867, "learning_rate": 1.229968510424665e-06, "loss": 0.6928, "step": 5522 }, { "epoch": 0.85, "grad_norm": 2.3139445552542974, "learning_rate": 1.2275875668156812e-06, "loss": 0.7818, "step": 5523 }, { "epoch": 0.85, "grad_norm": 2.5637986429478112, "learning_rate": 1.2252087792196432e-06, "loss": 0.624, "step": 5524 }, { "epoch": 0.85, "grad_norm": 2.720954635132861, "learning_rate": 1.2228321482211903e-06, "loss": 0.7613, "step": 5525 }, { "epoch": 0.85, "grad_norm": 3.421612728710161, "learning_rate": 1.2204576744044284e-06, "loss": 0.802, "step": 5526 }, { "epoch": 0.85, "grad_norm": 2.727338927023116, "learning_rate": 1.2180853583529394e-06, "loss": 0.826, "step": 5527 }, { "epoch": 0.85, "grad_norm": 2.9617344064018982, "learning_rate": 1.215715200649773e-06, "loss": 0.7746, "step": 5528 }, { "epoch": 0.85, "grad_norm": 2.6211203203398132, "learning_rate": 1.2133472018774439e-06, "loss": 0.7474, "step": 5529 }, { "epoch": 0.85, "grad_norm": 3.060772179332325, "learning_rate": 1.2109813626179434e-06, "loss": 0.7757, "step": 5530 }, { "epoch": 0.85, "grad_norm": 2.4640680019112717, "learning_rate": 1.2086176834527252e-06, "loss": 0.7388, "step": 5531 }, { "epoch": 0.85, "grad_norm": 2.4614625902107177, "learning_rate": 1.2062561649627158e-06, "loss": 0.7391, "step": 5532 }, { "epoch": 0.85, "grad_norm": 2.6876423721050724, "learning_rate": 1.203896807728313e-06, "loss": 0.773, "step": 5533 }, { "epoch": 0.85, "grad_norm": 2.7163326211812153, "learning_rate": 1.2015396123293766e-06, "loss": 0.8139, "step": 5534 }, { "epoch": 0.85, "grad_norm": 2.71148568189279, "learning_rate": 1.1991845793452438e-06, "loss": 0.7613, "step": 5535 }, { "epoch": 0.85, "grad_norm": 2.751931038453203, "learning_rate": 1.1968317093547133e-06, "loss": 0.8681, "step": 5536 }, { "epoch": 0.85, "grad_norm": 2.6658751770376754, "learning_rate": 1.1944810029360532e-06, "loss": 0.7588, "step": 5537 }, { "epoch": 0.85, "grad_norm": 2.668427297252355, "learning_rate": 1.1921324606670037e-06, "loss": 0.7865, "step": 5538 }, { "epoch": 0.85, "grad_norm": 2.89947410474797, "learning_rate": 1.1897860831247686e-06, "loss": 0.7112, "step": 5539 }, { "epoch": 0.85, "grad_norm": 2.5563106453481006, "learning_rate": 1.1874418708860237e-06, "loss": 0.7953, "step": 5540 }, { "epoch": 0.85, "grad_norm": 2.702224895601683, "learning_rate": 1.1850998245269096e-06, "loss": 0.8173, "step": 5541 }, { "epoch": 0.85, "grad_norm": 2.7219772816758465, "learning_rate": 1.1827599446230354e-06, "loss": 0.7415, "step": 5542 }, { "epoch": 0.85, "grad_norm": 2.811823473183657, "learning_rate": 1.180422231749475e-06, "loss": 0.7129, "step": 5543 }, { "epoch": 0.85, "grad_norm": 2.8051088616260356, "learning_rate": 1.1780866864807795e-06, "loss": 0.7613, "step": 5544 }, { "epoch": 0.85, "grad_norm": 2.516067073289398, "learning_rate": 1.1757533093909535e-06, "loss": 0.768, "step": 5545 }, { "epoch": 0.85, "grad_norm": 2.5930686110231003, "learning_rate": 1.1734221010534807e-06, "loss": 0.7489, "step": 5546 }, { "epoch": 0.85, "grad_norm": 2.893175999553963, "learning_rate": 1.1710930620413053e-06, "loss": 0.7489, "step": 5547 }, { "epoch": 0.85, "grad_norm": 2.8849620602373243, "learning_rate": 1.1687661929268367e-06, "loss": 0.7634, "step": 5548 }, { "epoch": 0.85, "grad_norm": 2.4784255720944017, "learning_rate": 1.166441494281959e-06, "loss": 0.7371, "step": 5549 }, { "epoch": 0.85, "grad_norm": 2.771587298983736, "learning_rate": 1.1641189666780151e-06, "loss": 0.7193, "step": 5550 }, { "epoch": 0.85, "grad_norm": 2.8362983127978527, "learning_rate": 1.161798610685818e-06, "loss": 0.8037, "step": 5551 }, { "epoch": 0.85, "grad_norm": 2.583812039759552, "learning_rate": 1.1594804268756455e-06, "loss": 0.6902, "step": 5552 }, { "epoch": 0.85, "grad_norm": 2.7400986861722365, "learning_rate": 1.1571644158172435e-06, "loss": 0.7665, "step": 5553 }, { "epoch": 0.85, "grad_norm": 2.8029871469307004, "learning_rate": 1.154850578079818e-06, "loss": 0.7821, "step": 5554 }, { "epoch": 0.85, "grad_norm": 2.5630396676092575, "learning_rate": 1.152538914232052e-06, "loss": 0.6432, "step": 5555 }, { "epoch": 0.85, "grad_norm": 2.754520435755249, "learning_rate": 1.150229424842082e-06, "loss": 0.7511, "step": 5556 }, { "epoch": 0.85, "grad_norm": 2.668987688707433, "learning_rate": 1.1479221104775195e-06, "loss": 0.7309, "step": 5557 }, { "epoch": 0.85, "grad_norm": 2.5638068256997326, "learning_rate": 1.1456169717054378e-06, "loss": 0.6905, "step": 5558 }, { "epoch": 0.85, "grad_norm": 2.690726492060148, "learning_rate": 1.143314009092371e-06, "loss": 0.7855, "step": 5559 }, { "epoch": 0.85, "grad_norm": 2.4735568575923454, "learning_rate": 1.141013223204328e-06, "loss": 0.6855, "step": 5560 }, { "epoch": 0.85, "grad_norm": 2.5690760259180982, "learning_rate": 1.138714614606775e-06, "loss": 0.7458, "step": 5561 }, { "epoch": 0.85, "grad_norm": 2.4077556748136186, "learning_rate": 1.136418183864646e-06, "loss": 0.7496, "step": 5562 }, { "epoch": 0.85, "grad_norm": 2.468467344759974, "learning_rate": 1.134123931542339e-06, "loss": 0.7601, "step": 5563 }, { "epoch": 0.85, "grad_norm": 2.7634747107670807, "learning_rate": 1.1318318582037168e-06, "loss": 0.7783, "step": 5564 }, { "epoch": 0.85, "grad_norm": 2.559302157414664, "learning_rate": 1.129541964412104e-06, "loss": 0.756, "step": 5565 }, { "epoch": 0.85, "grad_norm": 2.699526488995371, "learning_rate": 1.1272542507302985e-06, "loss": 0.7129, "step": 5566 }, { "epoch": 0.85, "grad_norm": 2.974066739013522, "learning_rate": 1.1249687177205493e-06, "loss": 0.8527, "step": 5567 }, { "epoch": 0.85, "grad_norm": 2.9764748728047463, "learning_rate": 1.1226853659445824e-06, "loss": 0.7279, "step": 5568 }, { "epoch": 0.85, "grad_norm": 2.857166506430615, "learning_rate": 1.1204041959635791e-06, "loss": 0.7442, "step": 5569 }, { "epoch": 0.85, "grad_norm": 2.896244832002908, "learning_rate": 1.1181252083381844e-06, "loss": 0.7165, "step": 5570 }, { "epoch": 0.85, "grad_norm": 2.3254418710580094, "learning_rate": 1.1158484036285134e-06, "loss": 0.7253, "step": 5571 }, { "epoch": 0.85, "grad_norm": 2.4974170064587518, "learning_rate": 1.1135737823941405e-06, "loss": 0.7949, "step": 5572 }, { "epoch": 0.85, "grad_norm": 2.777471113005515, "learning_rate": 1.1113013451941024e-06, "loss": 0.7766, "step": 5573 }, { "epoch": 0.85, "grad_norm": 2.921270848027808, "learning_rate": 1.1090310925869009e-06, "loss": 0.7718, "step": 5574 }, { "epoch": 0.85, "grad_norm": 2.701574184000981, "learning_rate": 1.1067630251304996e-06, "loss": 0.8679, "step": 5575 }, { "epoch": 0.85, "grad_norm": 2.787188494578665, "learning_rate": 1.104497143382325e-06, "loss": 0.7404, "step": 5576 }, { "epoch": 0.85, "grad_norm": 2.551177086497416, "learning_rate": 1.1022334478992702e-06, "loss": 0.7234, "step": 5577 }, { "epoch": 0.85, "grad_norm": 3.232648327249752, "learning_rate": 1.099971939237685e-06, "loss": 0.759, "step": 5578 }, { "epoch": 0.85, "grad_norm": 2.5073434658427947, "learning_rate": 1.0977126179533892e-06, "loss": 0.7839, "step": 5579 }, { "epoch": 0.85, "grad_norm": 2.541437023443651, "learning_rate": 1.0954554846016575e-06, "loss": 0.7859, "step": 5580 }, { "epoch": 0.85, "grad_norm": 2.5804987815371825, "learning_rate": 1.0932005397372282e-06, "loss": 0.7573, "step": 5581 }, { "epoch": 0.85, "grad_norm": 2.838112090513552, "learning_rate": 1.090947783914308e-06, "loss": 0.7422, "step": 5582 }, { "epoch": 0.85, "grad_norm": 2.659588045163355, "learning_rate": 1.0886972176865585e-06, "loss": 0.7169, "step": 5583 }, { "epoch": 0.85, "grad_norm": 2.6839338857706836, "learning_rate": 1.0864488416071061e-06, "loss": 0.731, "step": 5584 }, { "epoch": 0.85, "grad_norm": 2.3940028914051665, "learning_rate": 1.084202656228538e-06, "loss": 0.6844, "step": 5585 }, { "epoch": 0.86, "grad_norm": 2.7620213694437195, "learning_rate": 1.0819586621029043e-06, "loss": 0.7331, "step": 5586 }, { "epoch": 0.86, "grad_norm": 2.774375832645594, "learning_rate": 1.0797168597817143e-06, "loss": 0.6923, "step": 5587 }, { "epoch": 0.86, "grad_norm": 3.1324940080961343, "learning_rate": 1.0774772498159424e-06, "loss": 0.7889, "step": 5588 }, { "epoch": 0.86, "grad_norm": 2.439717577235111, "learning_rate": 1.0752398327560199e-06, "loss": 0.6448, "step": 5589 }, { "epoch": 0.86, "grad_norm": 2.6372831755194044, "learning_rate": 1.0730046091518442e-06, "loss": 0.7257, "step": 5590 }, { "epoch": 0.86, "grad_norm": 2.809764341873967, "learning_rate": 1.0707715795527685e-06, "loss": 0.7913, "step": 5591 }, { "epoch": 0.86, "grad_norm": 2.530267851629339, "learning_rate": 1.0685407445076067e-06, "loss": 0.6289, "step": 5592 }, { "epoch": 0.86, "grad_norm": 2.7837106686303845, "learning_rate": 1.0663121045646397e-06, "loss": 0.7535, "step": 5593 }, { "epoch": 0.86, "grad_norm": 2.8086295330354774, "learning_rate": 1.0640856602716021e-06, "loss": 0.7656, "step": 5594 }, { "epoch": 0.86, "grad_norm": 2.4661618844981947, "learning_rate": 1.0618614121756932e-06, "loss": 0.7703, "step": 5595 }, { "epoch": 0.86, "grad_norm": 2.8436514864476696, "learning_rate": 1.059639360823569e-06, "loss": 0.8581, "step": 5596 }, { "epoch": 0.86, "grad_norm": 3.331913971552762, "learning_rate": 1.057419506761347e-06, "loss": 0.815, "step": 5597 }, { "epoch": 0.86, "grad_norm": 2.9949193504323883, "learning_rate": 1.0552018505346074e-06, "loss": 0.7474, "step": 5598 }, { "epoch": 0.86, "grad_norm": 2.6932560263632497, "learning_rate": 1.0529863926883865e-06, "loss": 0.8571, "step": 5599 }, { "epoch": 0.86, "grad_norm": 2.72659374994844, "learning_rate": 1.05077313376718e-06, "loss": 0.6961, "step": 5600 }, { "epoch": 0.86, "grad_norm": 2.386739434798129, "learning_rate": 1.0485620743149494e-06, "loss": 0.7768, "step": 5601 }, { "epoch": 0.86, "grad_norm": 2.7055649797455272, "learning_rate": 1.0463532148751076e-06, "loss": 0.7969, "step": 5602 }, { "epoch": 0.86, "grad_norm": 2.753211825418324, "learning_rate": 1.0441465559905295e-06, "loss": 0.7847, "step": 5603 }, { "epoch": 0.86, "grad_norm": 2.629614530438831, "learning_rate": 1.0419420982035545e-06, "loss": 0.7199, "step": 5604 }, { "epoch": 0.86, "grad_norm": 2.634367194061076, "learning_rate": 1.0397398420559724e-06, "loss": 0.7798, "step": 5605 }, { "epoch": 0.86, "grad_norm": 2.7751002146810633, "learning_rate": 1.037539788089037e-06, "loss": 0.763, "step": 5606 }, { "epoch": 0.86, "grad_norm": 2.6799197719112042, "learning_rate": 1.0353419368434614e-06, "loss": 0.7823, "step": 5607 }, { "epoch": 0.86, "grad_norm": 2.7524900554080403, "learning_rate": 1.0331462888594112e-06, "loss": 0.8375, "step": 5608 }, { "epoch": 0.86, "grad_norm": 2.917619249390206, "learning_rate": 1.0309528446765206e-06, "loss": 0.7743, "step": 5609 }, { "epoch": 0.86, "grad_norm": 2.8815542439790574, "learning_rate": 1.0287616048338743e-06, "loss": 0.8678, "step": 5610 }, { "epoch": 0.86, "grad_norm": 2.8823740915476574, "learning_rate": 1.026572569870017e-06, "loss": 0.7844, "step": 5611 }, { "epoch": 0.86, "grad_norm": 2.7691903823345037, "learning_rate": 1.024385740322954e-06, "loss": 0.6978, "step": 5612 }, { "epoch": 0.86, "grad_norm": 2.748898140798564, "learning_rate": 1.022201116730145e-06, "loss": 0.8205, "step": 5613 }, { "epoch": 0.86, "grad_norm": 2.502627608195643, "learning_rate": 1.0200186996285077e-06, "loss": 0.7322, "step": 5614 }, { "epoch": 0.86, "grad_norm": 2.7651574060507675, "learning_rate": 1.0178384895544235e-06, "loss": 0.7698, "step": 5615 }, { "epoch": 0.86, "grad_norm": 2.6948552555209138, "learning_rate": 1.0156604870437247e-06, "loss": 0.8678, "step": 5616 }, { "epoch": 0.86, "grad_norm": 2.4570240141927884, "learning_rate": 1.0134846926317022e-06, "loss": 0.7441, "step": 5617 }, { "epoch": 0.86, "grad_norm": 2.5496315754472625, "learning_rate": 1.0113111068531068e-06, "loss": 0.7771, "step": 5618 }, { "epoch": 0.86, "grad_norm": 2.4844705154268096, "learning_rate": 1.0091397302421412e-06, "loss": 0.6951, "step": 5619 }, { "epoch": 0.86, "grad_norm": 2.9074753927062607, "learning_rate": 1.0069705633324745e-06, "loss": 0.8395, "step": 5620 }, { "epoch": 0.86, "grad_norm": 2.870455798197107, "learning_rate": 1.004803606657223e-06, "loss": 0.7325, "step": 5621 }, { "epoch": 0.86, "grad_norm": 2.792599450580263, "learning_rate": 1.0026388607489646e-06, "loss": 0.7905, "step": 5622 }, { "epoch": 0.86, "grad_norm": 3.0586931850734733, "learning_rate": 1.0004763261397355e-06, "loss": 0.8382, "step": 5623 }, { "epoch": 0.86, "grad_norm": 2.816909936823562, "learning_rate": 9.98316003361025e-07, "loss": 0.8835, "step": 5624 }, { "epoch": 0.86, "grad_norm": 2.657891923324879, "learning_rate": 9.961578929437764e-07, "loss": 0.7731, "step": 5625 }, { "epoch": 0.86, "grad_norm": 2.7264248273390295, "learning_rate": 9.940019954183977e-07, "loss": 0.8202, "step": 5626 }, { "epoch": 0.86, "grad_norm": 2.6457426310497985, "learning_rate": 9.918483113147447e-07, "loss": 0.7751, "step": 5627 }, { "epoch": 0.86, "grad_norm": 3.0766698726447506, "learning_rate": 9.896968411621332e-07, "loss": 0.8389, "step": 5628 }, { "epoch": 0.86, "grad_norm": 2.844765688371593, "learning_rate": 9.87547585489338e-07, "loss": 0.721, "step": 5629 }, { "epoch": 0.86, "grad_norm": 2.4495238218117894, "learning_rate": 9.854005448245796e-07, "loss": 0.6981, "step": 5630 }, { "epoch": 0.86, "grad_norm": 2.43118087190644, "learning_rate": 9.832557196955438e-07, "loss": 0.754, "step": 5631 }, { "epoch": 0.86, "grad_norm": 2.7459618919864357, "learning_rate": 9.811131106293691e-07, "loss": 0.7378, "step": 5632 }, { "epoch": 0.86, "grad_norm": 2.6027613647952124, "learning_rate": 9.789727181526453e-07, "loss": 0.767, "step": 5633 }, { "epoch": 0.86, "grad_norm": 2.827180784194871, "learning_rate": 9.76834542791425e-07, "loss": 0.826, "step": 5634 }, { "epoch": 0.86, "grad_norm": 2.2869195172489576, "learning_rate": 9.746985850712099e-07, "loss": 0.7602, "step": 5635 }, { "epoch": 0.86, "grad_norm": 2.518787706950005, "learning_rate": 9.725648455169568e-07, "loss": 0.7141, "step": 5636 }, { "epoch": 0.86, "grad_norm": 2.909684582981514, "learning_rate": 9.704333246530828e-07, "loss": 0.7257, "step": 5637 }, { "epoch": 0.86, "grad_norm": 3.0101097572734803, "learning_rate": 9.683040230034536e-07, "loss": 0.7605, "step": 5638 }, { "epoch": 0.86, "grad_norm": 2.8883948525240966, "learning_rate": 9.661769410913913e-07, "loss": 0.8731, "step": 5639 }, { "epoch": 0.86, "grad_norm": 2.799359360157228, "learning_rate": 9.640520794396746e-07, "loss": 0.7386, "step": 5640 }, { "epoch": 0.86, "grad_norm": 2.515485834092157, "learning_rate": 9.619294385705336e-07, "loss": 0.7184, "step": 5641 }, { "epoch": 0.86, "grad_norm": 2.3554919634425526, "learning_rate": 9.598090190056553e-07, "loss": 0.6601, "step": 5642 }, { "epoch": 0.86, "grad_norm": 2.5777835963991556, "learning_rate": 9.576908212661784e-07, "loss": 0.7231, "step": 5643 }, { "epoch": 0.86, "grad_norm": 2.544230852123378, "learning_rate": 9.555748458726944e-07, "loss": 0.7102, "step": 5644 }, { "epoch": 0.86, "grad_norm": 2.4889271852604997, "learning_rate": 9.534610933452548e-07, "loss": 0.7839, "step": 5645 }, { "epoch": 0.86, "grad_norm": 3.011943460171467, "learning_rate": 9.513495642033599e-07, "loss": 0.755, "step": 5646 }, { "epoch": 0.86, "grad_norm": 2.5838351272639097, "learning_rate": 9.4924025896596e-07, "loss": 0.8452, "step": 5647 }, { "epoch": 0.86, "grad_norm": 2.513812262444184, "learning_rate": 9.471331781514681e-07, "loss": 0.779, "step": 5648 }, { "epoch": 0.86, "grad_norm": 2.6039552895971103, "learning_rate": 9.450283222777445e-07, "loss": 0.7665, "step": 5649 }, { "epoch": 0.86, "grad_norm": 2.5535137404506805, "learning_rate": 9.429256918621011e-07, "loss": 0.7957, "step": 5650 }, { "epoch": 0.86, "grad_norm": 2.814064035545963, "learning_rate": 9.408252874213097e-07, "loss": 0.7728, "step": 5651 }, { "epoch": 0.87, "grad_norm": 2.437786969150219, "learning_rate": 9.387271094715877e-07, "loss": 0.735, "step": 5652 }, { "epoch": 0.87, "grad_norm": 2.7107592789968256, "learning_rate": 9.366311585286103e-07, "loss": 0.7294, "step": 5653 }, { "epoch": 0.87, "grad_norm": 2.6797509758975924, "learning_rate": 9.345374351075009e-07, "loss": 0.8449, "step": 5654 }, { "epoch": 0.87, "grad_norm": 2.702157913427348, "learning_rate": 9.324459397228391e-07, "loss": 0.7923, "step": 5655 }, { "epoch": 0.87, "grad_norm": 2.667032279356962, "learning_rate": 9.303566728886571e-07, "loss": 0.7211, "step": 5656 }, { "epoch": 0.87, "grad_norm": 2.483166462186134, "learning_rate": 9.282696351184383e-07, "loss": 0.6771, "step": 5657 }, { "epoch": 0.87, "grad_norm": 2.632341617741301, "learning_rate": 9.26184826925114e-07, "loss": 0.7805, "step": 5658 }, { "epoch": 0.87, "grad_norm": 2.803600915223199, "learning_rate": 9.241022488210772e-07, "loss": 0.7631, "step": 5659 }, { "epoch": 0.87, "grad_norm": 2.668583864035917, "learning_rate": 9.220219013181642e-07, "loss": 0.6902, "step": 5660 }, { "epoch": 0.87, "grad_norm": 2.9654201592454927, "learning_rate": 9.199437849276649e-07, "loss": 0.7293, "step": 5661 }, { "epoch": 0.87, "grad_norm": 2.4537593166619436, "learning_rate": 9.178679001603252e-07, "loss": 0.6717, "step": 5662 }, { "epoch": 0.87, "grad_norm": 2.7924386936278873, "learning_rate": 9.15794247526337e-07, "loss": 0.8249, "step": 5663 }, { "epoch": 0.87, "grad_norm": 2.5150201116445263, "learning_rate": 9.137228275353471e-07, "loss": 0.715, "step": 5664 }, { "epoch": 0.87, "grad_norm": 2.782277092977421, "learning_rate": 9.116536406964527e-07, "loss": 0.8094, "step": 5665 }, { "epoch": 0.87, "grad_norm": 2.6310827622808963, "learning_rate": 9.095866875181991e-07, "loss": 0.6985, "step": 5666 }, { "epoch": 0.87, "grad_norm": 2.7013028558305043, "learning_rate": 9.07521968508589e-07, "loss": 0.7311, "step": 5667 }, { "epoch": 0.87, "grad_norm": 2.504527699009366, "learning_rate": 9.054594841750707e-07, "loss": 0.6948, "step": 5668 }, { "epoch": 0.87, "grad_norm": 2.9194448959018002, "learning_rate": 9.033992350245435e-07, "loss": 0.8246, "step": 5669 }, { "epoch": 0.87, "grad_norm": 2.580593402067048, "learning_rate": 9.013412215633633e-07, "loss": 0.7519, "step": 5670 }, { "epoch": 0.87, "grad_norm": 2.645194130581552, "learning_rate": 8.992854442973264e-07, "loss": 0.7213, "step": 5671 }, { "epoch": 0.87, "grad_norm": 2.4402723740062746, "learning_rate": 8.972319037316901e-07, "loss": 0.6859, "step": 5672 }, { "epoch": 0.87, "grad_norm": 2.6777006267138024, "learning_rate": 8.95180600371156e-07, "loss": 0.7776, "step": 5673 }, { "epoch": 0.87, "grad_norm": 2.6668439690078825, "learning_rate": 8.931315347198754e-07, "loss": 0.7065, "step": 5674 }, { "epoch": 0.87, "grad_norm": 2.558904510622878, "learning_rate": 8.910847072814521e-07, "loss": 0.8571, "step": 5675 }, { "epoch": 0.87, "grad_norm": 2.8898235434369774, "learning_rate": 8.890401185589393e-07, "loss": 0.7485, "step": 5676 }, { "epoch": 0.87, "grad_norm": 2.6045615465844207, "learning_rate": 8.86997769054836e-07, "loss": 0.74, "step": 5677 }, { "epoch": 0.87, "grad_norm": 2.5371702561275455, "learning_rate": 8.849576592710996e-07, "loss": 0.7178, "step": 5678 }, { "epoch": 0.87, "grad_norm": 2.9241583684169945, "learning_rate": 8.82919789709129e-07, "loss": 0.7445, "step": 5679 }, { "epoch": 0.87, "grad_norm": 2.5870081381342502, "learning_rate": 8.808841608697749e-07, "loss": 0.8143, "step": 5680 }, { "epoch": 0.87, "grad_norm": 2.6290340495413433, "learning_rate": 8.788507732533413e-07, "loss": 0.8404, "step": 5681 }, { "epoch": 0.87, "grad_norm": 2.592317293842339, "learning_rate": 8.768196273595719e-07, "loss": 0.7048, "step": 5682 }, { "epoch": 0.87, "grad_norm": 2.6255917103261073, "learning_rate": 8.747907236876718e-07, "loss": 0.8538, "step": 5683 }, { "epoch": 0.87, "grad_norm": 2.8929535869985923, "learning_rate": 8.727640627362854e-07, "loss": 0.7605, "step": 5684 }, { "epoch": 0.87, "grad_norm": 2.5282572880948555, "learning_rate": 8.707396450035099e-07, "loss": 0.6847, "step": 5685 }, { "epoch": 0.87, "grad_norm": 2.571181113520221, "learning_rate": 8.687174709868895e-07, "loss": 0.821, "step": 5686 }, { "epoch": 0.87, "grad_norm": 2.640049537712735, "learning_rate": 8.666975411834188e-07, "loss": 0.7872, "step": 5687 }, { "epoch": 0.87, "grad_norm": 3.15738494350599, "learning_rate": 8.646798560895376e-07, "loss": 0.8605, "step": 5688 }, { "epoch": 0.87, "grad_norm": 2.660662027497053, "learning_rate": 8.626644162011399e-07, "loss": 0.7281, "step": 5689 }, { "epoch": 0.87, "grad_norm": 2.6983042556708496, "learning_rate": 8.606512220135621e-07, "loss": 0.7855, "step": 5690 }, { "epoch": 0.87, "grad_norm": 2.4483957728925616, "learning_rate": 8.586402740215893e-07, "loss": 0.7887, "step": 5691 }, { "epoch": 0.87, "grad_norm": 2.778073690955736, "learning_rate": 8.566315727194607e-07, "loss": 0.8097, "step": 5692 }, { "epoch": 0.87, "grad_norm": 2.7771629099321484, "learning_rate": 8.546251186008536e-07, "loss": 0.701, "step": 5693 }, { "epoch": 0.87, "grad_norm": 2.498328459699538, "learning_rate": 8.526209121589024e-07, "loss": 0.7905, "step": 5694 }, { "epoch": 0.87, "grad_norm": 2.6098896557905844, "learning_rate": 8.506189538861831e-07, "loss": 0.8243, "step": 5695 }, { "epoch": 0.87, "grad_norm": 2.677295059627817, "learning_rate": 8.486192442747221e-07, "loss": 0.7308, "step": 5696 }, { "epoch": 0.87, "grad_norm": 2.5776146690712345, "learning_rate": 8.466217838159896e-07, "loss": 0.8068, "step": 5697 }, { "epoch": 0.87, "grad_norm": 2.571734748384213, "learning_rate": 8.446265730009074e-07, "loss": 0.6534, "step": 5698 }, { "epoch": 0.87, "grad_norm": 2.7287346704837434, "learning_rate": 8.426336123198386e-07, "loss": 0.7079, "step": 5699 }, { "epoch": 0.87, "grad_norm": 2.5595723305614015, "learning_rate": 8.406429022626028e-07, "loss": 0.7684, "step": 5700 }, { "epoch": 0.87, "grad_norm": 2.519081924016723, "learning_rate": 8.386544433184573e-07, "loss": 0.7304, "step": 5701 }, { "epoch": 0.87, "grad_norm": 2.6865616162166055, "learning_rate": 8.36668235976108e-07, "loss": 0.6706, "step": 5702 }, { "epoch": 0.87, "grad_norm": 2.6769547229461383, "learning_rate": 8.346842807237132e-07, "loss": 0.799, "step": 5703 }, { "epoch": 0.87, "grad_norm": 2.525744181272662, "learning_rate": 8.327025780488696e-07, "loss": 0.6805, "step": 5704 }, { "epoch": 0.87, "grad_norm": 2.4046255837962276, "learning_rate": 8.307231284386264e-07, "loss": 0.7225, "step": 5705 }, { "epoch": 0.87, "grad_norm": 2.515854941221048, "learning_rate": 8.287459323794777e-07, "loss": 0.661, "step": 5706 }, { "epoch": 0.87, "grad_norm": 2.4848884090304417, "learning_rate": 8.267709903573606e-07, "loss": 0.7989, "step": 5707 }, { "epoch": 0.87, "grad_norm": 2.8341237442734837, "learning_rate": 8.247983028576612e-07, "loss": 0.8496, "step": 5708 }, { "epoch": 0.87, "grad_norm": 2.7306413767137143, "learning_rate": 8.228278703652115e-07, "loss": 0.743, "step": 5709 }, { "epoch": 0.87, "grad_norm": 2.7006097693456637, "learning_rate": 8.208596933642854e-07, "loss": 0.7519, "step": 5710 }, { "epoch": 0.87, "grad_norm": 2.667161566314598, "learning_rate": 8.188937723386104e-07, "loss": 0.7382, "step": 5711 }, { "epoch": 0.87, "grad_norm": 2.4897380903135375, "learning_rate": 8.16930107771352e-07, "loss": 0.6433, "step": 5712 }, { "epoch": 0.87, "grad_norm": 2.6686472482951227, "learning_rate": 8.149687001451223e-07, "loss": 0.7862, "step": 5713 }, { "epoch": 0.87, "grad_norm": 2.6432076739400863, "learning_rate": 8.130095499419843e-07, "loss": 0.768, "step": 5714 }, { "epoch": 0.87, "grad_norm": 2.623041787253728, "learning_rate": 8.110526576434386e-07, "loss": 0.8018, "step": 5715 }, { "epoch": 0.87, "grad_norm": 2.361320310110989, "learning_rate": 8.090980237304369e-07, "loss": 0.693, "step": 5716 }, { "epoch": 0.88, "grad_norm": 2.53061217832916, "learning_rate": 8.071456486833729e-07, "loss": 0.7252, "step": 5717 }, { "epoch": 0.88, "grad_norm": 2.7445462678102412, "learning_rate": 8.051955329820849e-07, "loss": 0.8299, "step": 5718 }, { "epoch": 0.88, "grad_norm": 2.678869645619878, "learning_rate": 8.032476771058572e-07, "loss": 0.779, "step": 5719 }, { "epoch": 0.88, "grad_norm": 2.597783234585979, "learning_rate": 8.013020815334182e-07, "loss": 0.7771, "step": 5720 }, { "epoch": 0.88, "grad_norm": 2.5709764296981987, "learning_rate": 7.993587467429387e-07, "loss": 0.6913, "step": 5721 }, { "epoch": 0.88, "grad_norm": 2.8878615808643717, "learning_rate": 7.974176732120386e-07, "loss": 0.8386, "step": 5722 }, { "epoch": 0.88, "grad_norm": 2.4739802793848904, "learning_rate": 7.954788614177789e-07, "loss": 0.7136, "step": 5723 }, { "epoch": 0.88, "grad_norm": 2.5254962861324204, "learning_rate": 7.935423118366625e-07, "loss": 0.7467, "step": 5724 }, { "epoch": 0.88, "grad_norm": 2.59825821177977, "learning_rate": 7.916080249446434e-07, "loss": 0.749, "step": 5725 }, { "epoch": 0.88, "grad_norm": 2.6911630767002617, "learning_rate": 7.896760012171101e-07, "loss": 0.8131, "step": 5726 }, { "epoch": 0.88, "grad_norm": 2.7269735173965386, "learning_rate": 7.87746241128905e-07, "loss": 0.8272, "step": 5727 }, { "epoch": 0.88, "grad_norm": 2.655547149168662, "learning_rate": 7.858187451543064e-07, "loss": 0.7408, "step": 5728 }, { "epoch": 0.88, "grad_norm": 2.7887679312481746, "learning_rate": 7.838935137670378e-07, "loss": 0.8403, "step": 5729 }, { "epoch": 0.88, "grad_norm": 2.8238335497241875, "learning_rate": 7.819705474402695e-07, "loss": 0.8438, "step": 5730 }, { "epoch": 0.88, "grad_norm": 2.7864697800450675, "learning_rate": 7.800498466466099e-07, "loss": 0.7503, "step": 5731 }, { "epoch": 0.88, "grad_norm": 2.9423775361485385, "learning_rate": 7.781314118581141e-07, "loss": 0.6889, "step": 5732 }, { "epoch": 0.88, "grad_norm": 2.6177300541736197, "learning_rate": 7.762152435462821e-07, "loss": 0.7418, "step": 5733 }, { "epoch": 0.88, "grad_norm": 2.5682224575904224, "learning_rate": 7.743013421820522e-07, "loss": 0.7338, "step": 5734 }, { "epoch": 0.88, "grad_norm": 2.730093806909977, "learning_rate": 7.723897082358067e-07, "loss": 0.7525, "step": 5735 }, { "epoch": 0.88, "grad_norm": 2.773570026247756, "learning_rate": 7.704803421773743e-07, "loss": 0.8018, "step": 5736 }, { "epoch": 0.88, "grad_norm": 2.8167438145337145, "learning_rate": 7.685732444760197e-07, "loss": 0.749, "step": 5737 }, { "epoch": 0.88, "grad_norm": 2.8460419034294295, "learning_rate": 7.666684156004589e-07, "loss": 0.7815, "step": 5738 }, { "epoch": 0.88, "grad_norm": 2.581997146323194, "learning_rate": 7.647658560188431e-07, "loss": 0.6945, "step": 5739 }, { "epoch": 0.88, "grad_norm": 2.6421514833054456, "learning_rate": 7.628655661987661e-07, "loss": 0.8141, "step": 5740 }, { "epoch": 0.88, "grad_norm": 2.500537719209546, "learning_rate": 7.609675466072719e-07, "loss": 0.7023, "step": 5741 }, { "epoch": 0.88, "grad_norm": 2.4936945926955283, "learning_rate": 7.590717977108342e-07, "loss": 0.7819, "step": 5742 }, { "epoch": 0.88, "grad_norm": 2.59145708475709, "learning_rate": 7.571783199753746e-07, "loss": 0.7877, "step": 5743 }, { "epoch": 0.88, "grad_norm": 2.8791756410563267, "learning_rate": 7.552871138662621e-07, "loss": 0.8033, "step": 5744 }, { "epoch": 0.88, "grad_norm": 2.410861373680256, "learning_rate": 7.53398179848297e-07, "loss": 0.6554, "step": 5745 }, { "epoch": 0.88, "grad_norm": 2.6788493966132716, "learning_rate": 7.515115183857302e-07, "loss": 0.8059, "step": 5746 }, { "epoch": 0.88, "grad_norm": 2.509424351655514, "learning_rate": 7.496271299422498e-07, "loss": 0.6685, "step": 5747 }, { "epoch": 0.88, "grad_norm": 2.487752174672153, "learning_rate": 7.477450149809818e-07, "loss": 0.6712, "step": 5748 }, { "epoch": 0.88, "grad_norm": 2.8476622128192064, "learning_rate": 7.458651739645017e-07, "loss": 0.8459, "step": 5749 }, { "epoch": 0.88, "grad_norm": 2.559185180461762, "learning_rate": 7.439876073548192e-07, "loss": 0.6805, "step": 5750 }, { "epoch": 0.88, "grad_norm": 2.459377863705062, "learning_rate": 7.421123156133869e-07, "loss": 0.7042, "step": 5751 }, { "epoch": 0.88, "grad_norm": 2.4854631564887613, "learning_rate": 7.402392992011032e-07, "loss": 0.7098, "step": 5752 }, { "epoch": 0.88, "grad_norm": 2.772999848604892, "learning_rate": 7.383685585782985e-07, "loss": 0.8132, "step": 5753 }, { "epoch": 0.88, "grad_norm": 3.296150380444605, "learning_rate": 7.365000942047506e-07, "loss": 0.8222, "step": 5754 }, { "epoch": 0.88, "grad_norm": 3.083231204527326, "learning_rate": 7.346339065396746e-07, "loss": 0.8012, "step": 5755 }, { "epoch": 0.88, "grad_norm": 2.4491012331318753, "learning_rate": 7.327699960417256e-07, "loss": 0.6899, "step": 5756 }, { "epoch": 0.88, "grad_norm": 2.7249084898062574, "learning_rate": 7.30908363169005e-07, "loss": 0.6925, "step": 5757 }, { "epoch": 0.88, "grad_norm": 2.6967437072752967, "learning_rate": 7.290490083790458e-07, "loss": 0.7866, "step": 5758 }, { "epoch": 0.88, "grad_norm": 2.556881939811135, "learning_rate": 7.271919321288268e-07, "loss": 0.7139, "step": 5759 }, { "epoch": 0.88, "grad_norm": 2.815172538512507, "learning_rate": 7.253371348747662e-07, "loss": 0.8048, "step": 5760 }, { "epoch": 0.88, "grad_norm": 2.638440578512759, "learning_rate": 7.234846170727194e-07, "loss": 0.7051, "step": 5761 }, { "epoch": 0.88, "grad_norm": 2.6966172220810654, "learning_rate": 7.216343791779834e-07, "loss": 0.7231, "step": 5762 }, { "epoch": 0.88, "grad_norm": 2.5534587002684463, "learning_rate": 7.197864216452965e-07, "loss": 0.7466, "step": 5763 }, { "epoch": 0.88, "grad_norm": 2.7849363578109165, "learning_rate": 7.179407449288344e-07, "loss": 0.7409, "step": 5764 }, { "epoch": 0.88, "grad_norm": 2.544837759440299, "learning_rate": 7.160973494822121e-07, "loss": 0.6465, "step": 5765 }, { "epoch": 0.88, "grad_norm": 2.7231069748329695, "learning_rate": 7.142562357584836e-07, "loss": 0.7866, "step": 5766 }, { "epoch": 0.88, "grad_norm": 2.546310544712134, "learning_rate": 7.124174042101428e-07, "loss": 0.7346, "step": 5767 }, { "epoch": 0.88, "grad_norm": 3.0119781270597197, "learning_rate": 7.105808552891258e-07, "loss": 0.8738, "step": 5768 }, { "epoch": 0.88, "grad_norm": 2.588257545636272, "learning_rate": 7.087465894468037e-07, "loss": 0.7377, "step": 5769 }, { "epoch": 0.88, "grad_norm": 2.6439528897738858, "learning_rate": 7.069146071339839e-07, "loss": 0.8051, "step": 5770 }, { "epoch": 0.88, "grad_norm": 2.7572845518844895, "learning_rate": 7.050849088009216e-07, "loss": 0.6755, "step": 5771 }, { "epoch": 0.88, "grad_norm": 3.5912309063614627, "learning_rate": 7.032574948973037e-07, "loss": 0.7551, "step": 5772 }, { "epoch": 0.88, "grad_norm": 2.710962518663575, "learning_rate": 7.014323658722544e-07, "loss": 0.7734, "step": 5773 }, { "epoch": 0.88, "grad_norm": 2.6644565985000748, "learning_rate": 6.996095221743426e-07, "loss": 0.7861, "step": 5774 }, { "epoch": 0.88, "grad_norm": 2.7841204191393847, "learning_rate": 6.977889642515711e-07, "loss": 0.8666, "step": 5775 }, { "epoch": 0.88, "grad_norm": 2.4809075380037533, "learning_rate": 6.959706925513832e-07, "loss": 0.6904, "step": 5776 }, { "epoch": 0.88, "grad_norm": 2.6378329404590213, "learning_rate": 6.941547075206567e-07, "loss": 0.7582, "step": 5777 }, { "epoch": 0.88, "grad_norm": 2.4058948024512397, "learning_rate": 6.923410096057093e-07, "loss": 0.7166, "step": 5778 }, { "epoch": 0.88, "grad_norm": 2.7027364478522724, "learning_rate": 6.905295992522998e-07, "loss": 0.6934, "step": 5779 }, { "epoch": 0.88, "grad_norm": 2.8777912482750647, "learning_rate": 6.887204769056221e-07, "loss": 0.8297, "step": 5780 }, { "epoch": 0.88, "grad_norm": 3.0010290591989044, "learning_rate": 6.86913643010304e-07, "loss": 0.8016, "step": 5781 }, { "epoch": 0.89, "grad_norm": 2.8797253529185736, "learning_rate": 6.851090980104191e-07, "loss": 0.7898, "step": 5782 }, { "epoch": 0.89, "grad_norm": 2.8338261112987286, "learning_rate": 6.833068423494727e-07, "loss": 0.8343, "step": 5783 }, { "epoch": 0.89, "grad_norm": 2.6323910473659247, "learning_rate": 6.815068764704047e-07, "loss": 0.8172, "step": 5784 }, { "epoch": 0.89, "grad_norm": 2.596841816940055, "learning_rate": 6.797092008156026e-07, "loss": 0.7403, "step": 5785 }, { "epoch": 0.89, "grad_norm": 2.738457357878896, "learning_rate": 6.779138158268806e-07, "loss": 0.6369, "step": 5786 }, { "epoch": 0.89, "grad_norm": 2.915690322939979, "learning_rate": 6.761207219454957e-07, "loss": 0.8547, "step": 5787 }, { "epoch": 0.89, "grad_norm": 2.537974599329801, "learning_rate": 6.743299196121389e-07, "loss": 0.7582, "step": 5788 }, { "epoch": 0.89, "grad_norm": 2.707714310242144, "learning_rate": 6.725414092669391e-07, "loss": 0.7383, "step": 5789 }, { "epoch": 0.89, "grad_norm": 2.5190834121324293, "learning_rate": 6.707551913494626e-07, "loss": 0.7306, "step": 5790 }, { "epoch": 0.89, "grad_norm": 2.8560257151264334, "learning_rate": 6.689712662987124e-07, "loss": 0.7703, "step": 5791 }, { "epoch": 0.89, "grad_norm": 2.6256493925464923, "learning_rate": 6.671896345531248e-07, "loss": 0.7432, "step": 5792 }, { "epoch": 0.89, "grad_norm": 2.4196845841221815, "learning_rate": 6.654102965505782e-07, "loss": 0.6261, "step": 5793 }, { "epoch": 0.89, "grad_norm": 2.6043213927859536, "learning_rate": 6.636332527283817e-07, "loss": 0.7592, "step": 5794 }, { "epoch": 0.89, "grad_norm": 2.3460123501037726, "learning_rate": 6.618585035232828e-07, "loss": 0.6926, "step": 5795 }, { "epoch": 0.89, "grad_norm": 2.5889018853522097, "learning_rate": 6.600860493714667e-07, "loss": 0.7117, "step": 5796 }, { "epoch": 0.89, "grad_norm": 2.515917583771837, "learning_rate": 6.583158907085518e-07, "loss": 0.6722, "step": 5797 }, { "epoch": 0.89, "grad_norm": 2.7197011832215168, "learning_rate": 6.565480279695946e-07, "loss": 0.7158, "step": 5798 }, { "epoch": 0.89, "grad_norm": 3.229504657309361, "learning_rate": 6.547824615890841e-07, "loss": 0.7511, "step": 5799 }, { "epoch": 0.89, "grad_norm": 2.5513386297995084, "learning_rate": 6.530191920009465e-07, "loss": 0.7745, "step": 5800 }, { "epoch": 0.89, "grad_norm": 2.616216804463169, "learning_rate": 6.512582196385475e-07, "loss": 0.7391, "step": 5801 }, { "epoch": 0.89, "grad_norm": 2.9565292094630533, "learning_rate": 6.49499544934683e-07, "loss": 0.7664, "step": 5802 }, { "epoch": 0.89, "grad_norm": 2.626493868178572, "learning_rate": 6.477431683215841e-07, "loss": 0.7836, "step": 5803 }, { "epoch": 0.89, "grad_norm": 2.5789068803703685, "learning_rate": 6.459890902309218e-07, "loss": 0.6654, "step": 5804 }, { "epoch": 0.89, "grad_norm": 2.8827734647635666, "learning_rate": 6.442373110937994e-07, "loss": 0.6883, "step": 5805 }, { "epoch": 0.89, "grad_norm": 2.7036197267519055, "learning_rate": 6.424878313407501e-07, "loss": 0.6766, "step": 5806 }, { "epoch": 0.89, "grad_norm": 2.4623104650551375, "learning_rate": 6.407406514017534e-07, "loss": 0.6671, "step": 5807 }, { "epoch": 0.89, "grad_norm": 2.441650887017995, "learning_rate": 6.389957717062145e-07, "loss": 0.5803, "step": 5808 }, { "epoch": 0.89, "grad_norm": 2.72535880083023, "learning_rate": 6.37253192682975e-07, "loss": 0.7387, "step": 5809 }, { "epoch": 0.89, "grad_norm": 2.531639961170924, "learning_rate": 6.355129147603134e-07, "loss": 0.7407, "step": 5810 }, { "epoch": 0.89, "grad_norm": 3.1542566998202872, "learning_rate": 6.337749383659386e-07, "loss": 0.8718, "step": 5811 }, { "epoch": 0.89, "grad_norm": 2.5198596314597026, "learning_rate": 6.32039263926999e-07, "loss": 0.694, "step": 5812 }, { "epoch": 0.89, "grad_norm": 2.5182991329606703, "learning_rate": 6.303058918700744e-07, "loss": 0.7802, "step": 5813 }, { "epoch": 0.89, "grad_norm": 2.603245498148873, "learning_rate": 6.285748226211774e-07, "loss": 0.7041, "step": 5814 }, { "epoch": 0.89, "grad_norm": 2.7378847104911825, "learning_rate": 6.268460566057599e-07, "loss": 0.7567, "step": 5815 }, { "epoch": 0.89, "grad_norm": 2.539181928795849, "learning_rate": 6.251195942487009e-07, "loss": 0.7489, "step": 5816 }, { "epoch": 0.89, "grad_norm": 2.6082265092629866, "learning_rate": 6.233954359743155e-07, "loss": 0.8378, "step": 5817 }, { "epoch": 0.89, "grad_norm": 2.479348939551202, "learning_rate": 6.216735822063569e-07, "loss": 0.7271, "step": 5818 }, { "epoch": 0.89, "grad_norm": 2.6870083550884587, "learning_rate": 6.199540333680065e-07, "loss": 0.7784, "step": 5819 }, { "epoch": 0.89, "grad_norm": 2.7374816629494965, "learning_rate": 6.18236789881882e-07, "loss": 0.796, "step": 5820 }, { "epoch": 0.89, "grad_norm": 3.599437310217699, "learning_rate": 6.165218521700333e-07, "loss": 0.8368, "step": 5821 }, { "epoch": 0.89, "grad_norm": 2.6962671130641436, "learning_rate": 6.148092206539425e-07, "loss": 0.6851, "step": 5822 }, { "epoch": 0.89, "grad_norm": 2.546788351836191, "learning_rate": 6.130988957545281e-07, "loss": 0.7044, "step": 5823 }, { "epoch": 0.89, "grad_norm": 2.4294266150794495, "learning_rate": 6.113908778921407e-07, "loss": 0.7229, "step": 5824 }, { "epoch": 0.89, "grad_norm": 2.5925856274482286, "learning_rate": 6.0968516748656e-07, "loss": 0.7794, "step": 5825 }, { "epoch": 0.89, "grad_norm": 2.8770495316561764, "learning_rate": 6.079817649570052e-07, "loss": 0.7082, "step": 5826 }, { "epoch": 0.89, "grad_norm": 2.799481628204912, "learning_rate": 6.062806707221236e-07, "loss": 0.769, "step": 5827 }, { "epoch": 0.89, "grad_norm": 2.7666018489574338, "learning_rate": 6.045818851999952e-07, "loss": 0.7609, "step": 5828 }, { "epoch": 0.89, "grad_norm": 2.753622059951933, "learning_rate": 6.028854088081359e-07, "loss": 0.7736, "step": 5829 }, { "epoch": 0.89, "grad_norm": 2.5700093823746983, "learning_rate": 6.011912419634924e-07, "loss": 0.6826, "step": 5830 }, { "epoch": 0.89, "grad_norm": 2.8027455356023525, "learning_rate": 5.994993850824415e-07, "loss": 0.8633, "step": 5831 }, { "epoch": 0.89, "grad_norm": 3.0498463017469133, "learning_rate": 5.97809838580794e-07, "loss": 0.7948, "step": 5832 }, { "epoch": 0.89, "grad_norm": 2.787764168001535, "learning_rate": 5.961226028737932e-07, "loss": 0.7494, "step": 5833 }, { "epoch": 0.89, "grad_norm": 3.0044287571788915, "learning_rate": 5.944376783761164e-07, "loss": 0.7201, "step": 5834 }, { "epoch": 0.89, "grad_norm": 2.6799421290682717, "learning_rate": 5.927550655018699e-07, "loss": 0.7481, "step": 5835 }, { "epoch": 0.89, "grad_norm": 2.7264929525021024, "learning_rate": 5.910747646645898e-07, "loss": 0.8224, "step": 5836 }, { "epoch": 0.89, "grad_norm": 2.545838928841274, "learning_rate": 5.893967762772512e-07, "loss": 0.7329, "step": 5837 }, { "epoch": 0.89, "grad_norm": 2.6500564552598376, "learning_rate": 5.877211007522555e-07, "loss": 0.7454, "step": 5838 }, { "epoch": 0.89, "grad_norm": 2.65799984455843, "learning_rate": 5.86047738501433e-07, "loss": 0.7239, "step": 5839 }, { "epoch": 0.89, "grad_norm": 2.6193677259589876, "learning_rate": 5.843766899360547e-07, "loss": 0.8434, "step": 5840 }, { "epoch": 0.89, "grad_norm": 2.3599943054612464, "learning_rate": 5.827079554668147e-07, "loss": 0.7064, "step": 5841 }, { "epoch": 0.89, "grad_norm": 2.5828320727422587, "learning_rate": 5.810415355038413e-07, "loss": 0.7733, "step": 5842 }, { "epoch": 0.89, "grad_norm": 2.954381968066485, "learning_rate": 5.793774304566946e-07, "loss": 0.8062, "step": 5843 }, { "epoch": 0.89, "grad_norm": 2.5084137036333116, "learning_rate": 5.777156407343621e-07, "loss": 0.8645, "step": 5844 }, { "epoch": 0.89, "grad_norm": 2.6051884694422123, "learning_rate": 5.76056166745268e-07, "loss": 0.8177, "step": 5845 }, { "epoch": 0.89, "grad_norm": 2.6013475332208693, "learning_rate": 5.74399008897265e-07, "loss": 0.7638, "step": 5846 }, { "epoch": 0.89, "grad_norm": 2.5731747532030598, "learning_rate": 5.72744167597632e-07, "loss": 0.7762, "step": 5847 }, { "epoch": 0.9, "grad_norm": 2.6559945677786647, "learning_rate": 5.710916432530877e-07, "loss": 0.7291, "step": 5848 }, { "epoch": 0.9, "grad_norm": 2.575724733370775, "learning_rate": 5.694414362697742e-07, "loss": 0.8015, "step": 5849 }, { "epoch": 0.9, "grad_norm": 2.6141726668382512, "learning_rate": 5.677935470532636e-07, "loss": 0.7739, "step": 5850 }, { "epoch": 0.9, "grad_norm": 2.869045509165927, "learning_rate": 5.661479760085642e-07, "loss": 0.8365, "step": 5851 }, { "epoch": 0.9, "grad_norm": 2.579470374575014, "learning_rate": 5.645047235401091e-07, "loss": 0.6617, "step": 5852 }, { "epoch": 0.9, "grad_norm": 2.665435683114741, "learning_rate": 5.628637900517652e-07, "loss": 0.7943, "step": 5853 }, { "epoch": 0.9, "grad_norm": 2.737771133863642, "learning_rate": 5.612251759468301e-07, "loss": 0.7013, "step": 5854 }, { "epoch": 0.9, "grad_norm": 2.6553279117918236, "learning_rate": 5.595888816280226e-07, "loss": 0.8019, "step": 5855 }, { "epoch": 0.9, "grad_norm": 2.523369735721981, "learning_rate": 5.579549074975032e-07, "loss": 0.6509, "step": 5856 }, { "epoch": 0.9, "grad_norm": 5.0010206491405205, "learning_rate": 5.563232539568553e-07, "loss": 0.8583, "step": 5857 }, { "epoch": 0.9, "grad_norm": 2.5704308172659847, "learning_rate": 5.546939214070923e-07, "loss": 0.7319, "step": 5858 }, { "epoch": 0.9, "grad_norm": 2.9387864037827605, "learning_rate": 5.530669102486619e-07, "loss": 0.8983, "step": 5859 }, { "epoch": 0.9, "grad_norm": 2.934935762272051, "learning_rate": 5.514422208814352e-07, "loss": 0.6828, "step": 5860 }, { "epoch": 0.9, "grad_norm": 2.334253343305994, "learning_rate": 5.49819853704715e-07, "loss": 0.7281, "step": 5861 }, { "epoch": 0.9, "grad_norm": 2.8362699742929722, "learning_rate": 5.481998091172358e-07, "loss": 0.7761, "step": 5862 }, { "epoch": 0.9, "grad_norm": 2.5752370173710646, "learning_rate": 5.465820875171557e-07, "loss": 0.7658, "step": 5863 }, { "epoch": 0.9, "grad_norm": 3.7342653154702, "learning_rate": 5.4496668930207e-07, "loss": 0.818, "step": 5864 }, { "epoch": 0.9, "grad_norm": 2.625860994123965, "learning_rate": 5.433536148689944e-07, "loss": 0.7583, "step": 5865 }, { "epoch": 0.9, "grad_norm": 2.774371794317327, "learning_rate": 5.417428646143797e-07, "loss": 0.773, "step": 5866 }, { "epoch": 0.9, "grad_norm": 2.7174080154169933, "learning_rate": 5.401344389341013e-07, "loss": 0.7942, "step": 5867 }, { "epoch": 0.9, "grad_norm": 2.928065420008455, "learning_rate": 5.385283382234674e-07, "loss": 0.7364, "step": 5868 }, { "epoch": 0.9, "grad_norm": 3.071811201597634, "learning_rate": 5.369245628772079e-07, "loss": 0.72, "step": 5869 }, { "epoch": 0.9, "grad_norm": 2.608659449286244, "learning_rate": 5.35323113289491e-07, "loss": 0.6901, "step": 5870 }, { "epoch": 0.9, "grad_norm": 2.684645889963231, "learning_rate": 5.337239898539071e-07, "loss": 0.7628, "step": 5871 }, { "epoch": 0.9, "grad_norm": 2.6352813074432464, "learning_rate": 5.321271929634719e-07, "loss": 0.8173, "step": 5872 }, { "epoch": 0.9, "grad_norm": 2.590851206610053, "learning_rate": 5.305327230106383e-07, "loss": 0.6468, "step": 5873 }, { "epoch": 0.9, "grad_norm": 2.752933732434142, "learning_rate": 5.289405803872782e-07, "loss": 0.7054, "step": 5874 }, { "epoch": 0.9, "grad_norm": 3.2451802406056287, "learning_rate": 5.273507654846999e-07, "loss": 0.7577, "step": 5875 }, { "epoch": 0.9, "grad_norm": 2.853764991107126, "learning_rate": 5.257632786936328e-07, "loss": 0.7259, "step": 5876 }, { "epoch": 0.9, "grad_norm": 2.4914983750520885, "learning_rate": 5.241781204042362e-07, "loss": 0.6737, "step": 5877 }, { "epoch": 0.9, "grad_norm": 3.05209228111768, "learning_rate": 5.225952910060994e-07, "loss": 0.7027, "step": 5878 }, { "epoch": 0.9, "grad_norm": 2.621867794165036, "learning_rate": 5.210147908882357e-07, "loss": 0.7497, "step": 5879 }, { "epoch": 0.9, "grad_norm": 2.922865468524496, "learning_rate": 5.194366204390867e-07, "loss": 0.8475, "step": 5880 }, { "epoch": 0.9, "grad_norm": 2.5874957343095515, "learning_rate": 5.178607800465252e-07, "loss": 0.6427, "step": 5881 }, { "epoch": 0.9, "grad_norm": 2.825060543684903, "learning_rate": 5.162872700978483e-07, "loss": 0.762, "step": 5882 }, { "epoch": 0.9, "grad_norm": 2.9238862189206114, "learning_rate": 5.147160909797777e-07, "loss": 0.8199, "step": 5883 }, { "epoch": 0.9, "grad_norm": 3.1311383501295964, "learning_rate": 5.13147243078469e-07, "loss": 0.8092, "step": 5884 }, { "epoch": 0.9, "grad_norm": 2.8516286192122204, "learning_rate": 5.11580726779497e-07, "loss": 0.7055, "step": 5885 }, { "epoch": 0.9, "grad_norm": 2.597901538018006, "learning_rate": 5.100165424678715e-07, "loss": 0.7527, "step": 5886 }, { "epoch": 0.9, "grad_norm": 2.8139780948798947, "learning_rate": 5.08454690528023e-07, "loss": 0.7478, "step": 5887 }, { "epoch": 0.9, "grad_norm": 2.434325694543393, "learning_rate": 5.06895171343812e-07, "loss": 0.7304, "step": 5888 }, { "epoch": 0.9, "grad_norm": 2.4674361312600506, "learning_rate": 5.05337985298523e-07, "loss": 0.7271, "step": 5889 }, { "epoch": 0.9, "grad_norm": 2.642364749860154, "learning_rate": 5.037831327748699e-07, "loss": 0.7311, "step": 5890 }, { "epoch": 0.9, "grad_norm": 2.688287039097314, "learning_rate": 5.022306141549893e-07, "loss": 0.7586, "step": 5891 }, { "epoch": 0.9, "grad_norm": 2.4741994629105313, "learning_rate": 5.006804298204515e-07, "loss": 0.7872, "step": 5892 }, { "epoch": 0.9, "grad_norm": 4.854934089901339, "learning_rate": 4.991325801522429e-07, "loss": 0.6969, "step": 5893 }, { "epoch": 0.9, "grad_norm": 3.41087337300248, "learning_rate": 4.975870655307868e-07, "loss": 0.7949, "step": 5894 }, { "epoch": 0.9, "grad_norm": 2.953241328716546, "learning_rate": 4.96043886335924e-07, "loss": 0.7946, "step": 5895 }, { "epoch": 0.9, "grad_norm": 2.8049058507014983, "learning_rate": 4.945030429469244e-07, "loss": 0.7912, "step": 5896 }, { "epoch": 0.9, "grad_norm": 2.595414400806504, "learning_rate": 4.929645357424862e-07, "loss": 0.7498, "step": 5897 }, { "epoch": 0.9, "grad_norm": 2.745635922213741, "learning_rate": 4.914283651007312e-07, "loss": 0.7651, "step": 5898 }, { "epoch": 0.9, "grad_norm": 2.982896718226123, "learning_rate": 4.898945313992054e-07, "loss": 0.7171, "step": 5899 }, { "epoch": 0.9, "grad_norm": 2.6219179763701024, "learning_rate": 4.883630350148827e-07, "loss": 0.737, "step": 5900 }, { "epoch": 0.9, "grad_norm": 2.602707647768999, "learning_rate": 4.868338763241631e-07, "loss": 0.7237, "step": 5901 }, { "epoch": 0.9, "grad_norm": 2.6003569589861706, "learning_rate": 4.853070557028672e-07, "loss": 0.7409, "step": 5902 }, { "epoch": 0.9, "grad_norm": 2.5458129925277726, "learning_rate": 4.837825735262503e-07, "loss": 0.7459, "step": 5903 }, { "epoch": 0.9, "grad_norm": 2.758220418720208, "learning_rate": 4.822604301689826e-07, "loss": 0.8263, "step": 5904 }, { "epoch": 0.9, "grad_norm": 2.79996513102502, "learning_rate": 4.807406260051672e-07, "loss": 0.848, "step": 5905 }, { "epoch": 0.9, "grad_norm": 2.7388566308633995, "learning_rate": 4.792231614083287e-07, "loss": 0.7703, "step": 5906 }, { "epoch": 0.9, "grad_norm": 2.704544888648876, "learning_rate": 4.777080367514153e-07, "loss": 0.8292, "step": 5907 }, { "epoch": 0.9, "grad_norm": 2.7338348905418997, "learning_rate": 4.7619525240680475e-07, "loss": 0.7844, "step": 5908 }, { "epoch": 0.9, "grad_norm": 2.545428147432849, "learning_rate": 4.746848087462963e-07, "loss": 0.7442, "step": 5909 }, { "epoch": 0.9, "grad_norm": 3.751373583389988, "learning_rate": 4.731767061411141e-07, "loss": 0.7968, "step": 5910 }, { "epoch": 0.9, "grad_norm": 2.632331818799675, "learning_rate": 4.716709449619084e-07, "loss": 0.8126, "step": 5911 }, { "epoch": 0.9, "grad_norm": 2.397912522997727, "learning_rate": 4.701675255787519e-07, "loss": 0.7333, "step": 5912 }, { "epoch": 0.91, "grad_norm": 2.8191668230435645, "learning_rate": 4.686664483611425e-07, "loss": 0.6911, "step": 5913 }, { "epoch": 0.91, "grad_norm": 2.5749939501751276, "learning_rate": 4.6716771367800507e-07, "loss": 0.6782, "step": 5914 }, { "epoch": 0.91, "grad_norm": 2.6443502932254215, "learning_rate": 4.656713218976838e-07, "loss": 0.6271, "step": 5915 }, { "epoch": 0.91, "grad_norm": 3.0452845496714342, "learning_rate": 4.641772733879535e-07, "loss": 0.8101, "step": 5916 }, { "epoch": 0.91, "grad_norm": 3.292735176357161, "learning_rate": 4.626855685160059e-07, "loss": 0.8173, "step": 5917 }, { "epoch": 0.91, "grad_norm": 2.5951150490455013, "learning_rate": 4.611962076484611e-07, "loss": 0.7186, "step": 5918 }, { "epoch": 0.91, "grad_norm": 3.0111442088519453, "learning_rate": 4.5970919115136406e-07, "loss": 0.7697, "step": 5919 }, { "epoch": 0.91, "grad_norm": 2.5973671680062185, "learning_rate": 4.582245193901802e-07, "loss": 0.7933, "step": 5920 }, { "epoch": 0.91, "grad_norm": 2.475572384849162, "learning_rate": 4.567421927297999e-07, "loss": 0.6779, "step": 5921 }, { "epoch": 0.91, "grad_norm": 2.4497504442552986, "learning_rate": 4.5526221153453845e-07, "loss": 0.7951, "step": 5922 }, { "epoch": 0.91, "grad_norm": 3.2379051155154412, "learning_rate": 4.5378457616813255e-07, "loss": 0.8106, "step": 5923 }, { "epoch": 0.91, "grad_norm": 3.0168271076344393, "learning_rate": 4.52309286993744e-07, "loss": 0.8453, "step": 5924 }, { "epoch": 0.91, "grad_norm": 2.5571350055712943, "learning_rate": 4.508363443739583e-07, "loss": 0.7459, "step": 5925 }, { "epoch": 0.91, "grad_norm": 2.422313233053986, "learning_rate": 4.493657486707814e-07, "loss": 0.8195, "step": 5926 }, { "epoch": 0.91, "grad_norm": 2.6614041261222745, "learning_rate": 4.478975002456465e-07, "loss": 0.8475, "step": 5927 }, { "epoch": 0.91, "grad_norm": 2.5459306588993655, "learning_rate": 4.4643159945940816e-07, "loss": 0.7793, "step": 5928 }, { "epoch": 0.91, "grad_norm": 2.638010072274686, "learning_rate": 4.449680466723416e-07, "loss": 0.7412, "step": 5929 }, { "epoch": 0.91, "grad_norm": 2.6374186218730866, "learning_rate": 4.435068422441491e-07, "loss": 0.7863, "step": 5930 }, { "epoch": 0.91, "grad_norm": 3.0473504441514527, "learning_rate": 4.4204798653395334e-07, "loss": 0.724, "step": 5931 }, { "epoch": 0.91, "grad_norm": 2.880335924375563, "learning_rate": 4.405914799002997e-07, "loss": 0.8654, "step": 5932 }, { "epoch": 0.91, "grad_norm": 2.5988830192409593, "learning_rate": 4.391373227011564e-07, "loss": 0.8395, "step": 5933 }, { "epoch": 0.91, "grad_norm": 2.8005912873580217, "learning_rate": 4.376855152939152e-07, "loss": 0.8489, "step": 5934 }, { "epoch": 0.91, "grad_norm": 2.5327179535187696, "learning_rate": 4.362360580353875e-07, "loss": 0.7765, "step": 5935 }, { "epoch": 0.91, "grad_norm": 3.110555066156692, "learning_rate": 4.347889512818115e-07, "loss": 0.7865, "step": 5936 }, { "epoch": 0.91, "grad_norm": 2.425327650857889, "learning_rate": 4.33344195388844e-07, "loss": 0.7973, "step": 5937 }, { "epoch": 0.91, "grad_norm": 2.484576397445717, "learning_rate": 4.319017907115686e-07, "loss": 0.6719, "step": 5938 }, { "epoch": 0.91, "grad_norm": 2.7141696380110596, "learning_rate": 4.3046173760448507e-07, "loss": 0.7373, "step": 5939 }, { "epoch": 0.91, "grad_norm": 2.6212725554013523, "learning_rate": 4.2902403642151704e-07, "loss": 0.6919, "step": 5940 }, { "epoch": 0.91, "grad_norm": 2.622753613256637, "learning_rate": 4.27588687516014e-07, "loss": 0.7129, "step": 5941 }, { "epoch": 0.91, "grad_norm": 2.7304264084363306, "learning_rate": 4.2615569124074385e-07, "loss": 0.7973, "step": 5942 }, { "epoch": 0.91, "grad_norm": 2.6718605017803423, "learning_rate": 4.2472504794789593e-07, "loss": 0.764, "step": 5943 }, { "epoch": 0.91, "grad_norm": 3.0322436728986784, "learning_rate": 4.232967579890823e-07, "loss": 0.8199, "step": 5944 }, { "epoch": 0.91, "grad_norm": 2.7072193959741755, "learning_rate": 4.2187082171533665e-07, "loss": 0.8041, "step": 5945 }, { "epoch": 0.91, "grad_norm": 2.8810435881483913, "learning_rate": 4.204472394771142e-07, "loss": 0.7923, "step": 5946 }, { "epoch": 0.91, "grad_norm": 2.8209460055158027, "learning_rate": 4.190260116242917e-07, "loss": 0.5896, "step": 5947 }, { "epoch": 0.91, "grad_norm": 2.612444919294644, "learning_rate": 4.176071385061664e-07, "loss": 0.7208, "step": 5948 }, { "epoch": 0.91, "grad_norm": 2.5928435503810485, "learning_rate": 4.1619062047145943e-07, "loss": 0.735, "step": 5949 }, { "epoch": 0.91, "grad_norm": 2.7103087781610578, "learning_rate": 4.1477645786831e-07, "loss": 0.7629, "step": 5950 }, { "epoch": 0.91, "grad_norm": 2.55167539583218, "learning_rate": 4.1336465104427793e-07, "loss": 0.6997, "step": 5951 }, { "epoch": 0.91, "grad_norm": 2.608092717551468, "learning_rate": 4.1195520034634896e-07, "loss": 0.7727, "step": 5952 }, { "epoch": 0.91, "grad_norm": 2.578863079954882, "learning_rate": 4.105481061209249e-07, "loss": 0.7717, "step": 5953 }, { "epoch": 0.91, "grad_norm": 2.6419007520560576, "learning_rate": 4.091433687138291e-07, "loss": 0.7056, "step": 5954 }, { "epoch": 0.91, "grad_norm": 2.674006217875925, "learning_rate": 4.0774098847030875e-07, "loss": 0.8579, "step": 5955 }, { "epoch": 0.91, "grad_norm": 2.6653185857072184, "learning_rate": 4.063409657350281e-07, "loss": 0.7532, "step": 5956 }, { "epoch": 0.91, "grad_norm": 2.695677170700721, "learning_rate": 4.0494330085207314e-07, "loss": 0.7022, "step": 5957 }, { "epoch": 0.91, "grad_norm": 2.7217815025546916, "learning_rate": 4.0354799416495227e-07, "loss": 0.7414, "step": 5958 }, { "epoch": 0.91, "grad_norm": 2.6230826008731913, "learning_rate": 4.0215504601659017e-07, "loss": 0.7257, "step": 5959 }, { "epoch": 0.91, "grad_norm": 3.201265139540046, "learning_rate": 4.007644567493374e-07, "loss": 0.8023, "step": 5960 }, { "epoch": 0.91, "grad_norm": 2.534269235714635, "learning_rate": 3.993762267049606e-07, "loss": 0.6844, "step": 5961 }, { "epoch": 0.91, "grad_norm": 2.6808601105724374, "learning_rate": 3.9799035622464674e-07, "loss": 0.6956, "step": 5962 }, { "epoch": 0.91, "grad_norm": 2.581383954042853, "learning_rate": 3.9660684564900574e-07, "loss": 0.7696, "step": 5963 }, { "epoch": 0.91, "grad_norm": 2.5849069154868634, "learning_rate": 3.9522569531806556e-07, "loss": 0.7603, "step": 5964 }, { "epoch": 0.91, "grad_norm": 2.768738229693881, "learning_rate": 3.9384690557127125e-07, "loss": 0.6432, "step": 5965 }, { "epoch": 0.91, "grad_norm": 2.8027221859258016, "learning_rate": 3.9247047674749625e-07, "loss": 0.7137, "step": 5966 }, { "epoch": 0.91, "grad_norm": 2.6574074923851816, "learning_rate": 3.9109640918502333e-07, "loss": 0.7667, "step": 5967 }, { "epoch": 0.91, "grad_norm": 2.7951563356356353, "learning_rate": 3.897247032215601e-07, "loss": 0.8096, "step": 5968 }, { "epoch": 0.91, "grad_norm": 2.5183737997202704, "learning_rate": 3.883553591942346e-07, "loss": 0.812, "step": 5969 }, { "epoch": 0.91, "grad_norm": 2.8079909667473477, "learning_rate": 3.8698837743959325e-07, "loss": 0.7284, "step": 5970 }, { "epoch": 0.91, "grad_norm": 2.68043983615785, "learning_rate": 3.8562375829360286e-07, "loss": 0.6751, "step": 5971 }, { "epoch": 0.91, "grad_norm": 2.7758583003898907, "learning_rate": 3.8426150209164624e-07, "loss": 0.7877, "step": 5972 }, { "epoch": 0.91, "grad_norm": 2.7937395372284466, "learning_rate": 3.8290160916852894e-07, "loss": 0.8075, "step": 5973 }, { "epoch": 0.91, "grad_norm": 2.688516227702249, "learning_rate": 3.8154407985847466e-07, "loss": 0.7854, "step": 5974 }, { "epoch": 0.91, "grad_norm": 2.628765653663377, "learning_rate": 3.8018891449512654e-07, "loss": 0.6539, "step": 5975 }, { "epoch": 0.91, "grad_norm": 2.7655961176352757, "learning_rate": 3.788361134115448e-07, "loss": 0.7628, "step": 5976 }, { "epoch": 0.91, "grad_norm": 2.6225969425848907, "learning_rate": 3.774856769402113e-07, "loss": 0.7296, "step": 5977 }, { "epoch": 0.92, "grad_norm": 2.691373260121956, "learning_rate": 3.7613760541302504e-07, "loss": 0.7735, "step": 5978 }, { "epoch": 0.92, "grad_norm": 2.5287047223034285, "learning_rate": 3.747918991613031e-07, "loss": 0.7892, "step": 5979 }, { "epoch": 0.92, "grad_norm": 2.56744426619113, "learning_rate": 3.734485585157843e-07, "loss": 0.7774, "step": 5980 }, { "epoch": 0.92, "grad_norm": 2.6214997737584773, "learning_rate": 3.7210758380662125e-07, "loss": 0.8006, "step": 5981 }, { "epoch": 0.92, "grad_norm": 3.8131875526793033, "learning_rate": 3.707689753633914e-07, "loss": 0.7807, "step": 5982 }, { "epoch": 0.92, "grad_norm": 2.8500936267310752, "learning_rate": 3.6943273351508604e-07, "loss": 0.7625, "step": 5983 }, { "epoch": 0.92, "grad_norm": 2.6465562873541058, "learning_rate": 3.680988585901124e-07, "loss": 0.7722, "step": 5984 }, { "epoch": 0.92, "grad_norm": 2.5574726673727084, "learning_rate": 3.66767350916305e-07, "loss": 0.8831, "step": 5985 }, { "epoch": 0.92, "grad_norm": 2.782848100485891, "learning_rate": 3.654382108209087e-07, "loss": 0.73, "step": 5986 }, { "epoch": 0.92, "grad_norm": 2.6344063959839676, "learning_rate": 3.6411143863058773e-07, "loss": 0.7311, "step": 5987 }, { "epoch": 0.92, "grad_norm": 2.6342752111204453, "learning_rate": 3.6278703467142684e-07, "loss": 0.7964, "step": 5988 }, { "epoch": 0.92, "grad_norm": 2.6008339547301254, "learning_rate": 3.6146499926892786e-07, "loss": 0.7708, "step": 5989 }, { "epoch": 0.92, "grad_norm": 2.7388841845665794, "learning_rate": 3.6014533274800867e-07, "loss": 0.7886, "step": 5990 }, { "epoch": 0.92, "grad_norm": 2.4069664083059545, "learning_rate": 3.588280354330065e-07, "loss": 0.6621, "step": 5991 }, { "epoch": 0.92, "grad_norm": 2.8839315931631804, "learning_rate": 3.5751310764767567e-07, "loss": 0.7736, "step": 5992 }, { "epoch": 0.92, "grad_norm": 2.733277195734112, "learning_rate": 3.56200549715191e-07, "loss": 0.8026, "step": 5993 }, { "epoch": 0.92, "grad_norm": 2.540055857905672, "learning_rate": 3.5489036195814007e-07, "loss": 0.6752, "step": 5994 }, { "epoch": 0.92, "grad_norm": 2.8417129277346667, "learning_rate": 3.535825446985297e-07, "loss": 0.7915, "step": 5995 }, { "epoch": 0.92, "grad_norm": 2.5360667168506597, "learning_rate": 3.522770982577872e-07, "loss": 0.7955, "step": 5996 }, { "epoch": 0.92, "grad_norm": 2.6673994000706913, "learning_rate": 3.5097402295675373e-07, "loss": 0.7334, "step": 5997 }, { "epoch": 0.92, "grad_norm": 2.472030136770235, "learning_rate": 3.496733191156876e-07, "loss": 0.7367, "step": 5998 }, { "epoch": 0.92, "grad_norm": 2.6281808987607693, "learning_rate": 3.483749870542663e-07, "loss": 0.811, "step": 5999 }, { "epoch": 0.92, "grad_norm": 3.084336288946746, "learning_rate": 3.470790270915836e-07, "loss": 0.791, "step": 6000 }, { "epoch": 0.92, "grad_norm": 2.69540151361276, "learning_rate": 3.4578543954615017e-07, "loss": 0.8372, "step": 6001 }, { "epoch": 0.92, "grad_norm": 2.6477593650807663, "learning_rate": 3.444942247358918e-07, "loss": 0.7617, "step": 6002 }, { "epoch": 0.92, "grad_norm": 2.6109865773332137, "learning_rate": 3.4320538297815454e-07, "loss": 0.8041, "step": 6003 }, { "epoch": 0.92, "grad_norm": 2.564118447432275, "learning_rate": 3.419189145896995e-07, "loss": 0.6351, "step": 6004 }, { "epoch": 0.92, "grad_norm": 2.7347820160761946, "learning_rate": 3.4063481988670375e-07, "loss": 0.85, "step": 6005 }, { "epoch": 0.92, "grad_norm": 2.6586665554457767, "learning_rate": 3.3935309918476155e-07, "loss": 0.7468, "step": 6006 }, { "epoch": 0.92, "grad_norm": 2.6617987820098747, "learning_rate": 3.3807375279888644e-07, "loss": 0.7237, "step": 6007 }, { "epoch": 0.92, "grad_norm": 2.6600573016886737, "learning_rate": 3.3679678104350353e-07, "loss": 0.7518, "step": 6008 }, { "epoch": 0.92, "grad_norm": 2.5286339031019405, "learning_rate": 3.355221842324552e-07, "loss": 0.7932, "step": 6009 }, { "epoch": 0.92, "grad_norm": 3.454421060746707, "learning_rate": 3.342499626790052e-07, "loss": 0.8156, "step": 6010 }, { "epoch": 0.92, "grad_norm": 2.7733675155642494, "learning_rate": 3.329801166958291e-07, "loss": 0.8301, "step": 6011 }, { "epoch": 0.92, "grad_norm": 3.1470563228210944, "learning_rate": 3.317126465950171e-07, "loss": 0.7118, "step": 6012 }, { "epoch": 0.92, "grad_norm": 2.7036527420458616, "learning_rate": 3.3044755268808013e-07, "loss": 0.7611, "step": 6013 }, { "epoch": 0.92, "grad_norm": 2.697254921913109, "learning_rate": 3.291848352859406e-07, "loss": 0.7057, "step": 6014 }, { "epoch": 0.92, "grad_norm": 2.5999225248707623, "learning_rate": 3.279244946989424e-07, "loss": 0.7057, "step": 6015 }, { "epoch": 0.92, "grad_norm": 2.8870135590746515, "learning_rate": 3.266665312368389e-07, "loss": 0.6807, "step": 6016 }, { "epoch": 0.92, "grad_norm": 2.7602796195978123, "learning_rate": 3.2541094520880166e-07, "loss": 0.8041, "step": 6017 }, { "epoch": 0.92, "grad_norm": 3.4683812993866825, "learning_rate": 3.241577369234228e-07, "loss": 0.8085, "step": 6018 }, { "epoch": 0.92, "grad_norm": 2.4856380251395604, "learning_rate": 3.229069066887014e-07, "loss": 0.7617, "step": 6019 }, { "epoch": 0.92, "grad_norm": 2.4488210107592954, "learning_rate": 3.2165845481205826e-07, "loss": 0.7211, "step": 6020 }, { "epoch": 0.92, "grad_norm": 2.4634891340354304, "learning_rate": 3.2041238160032793e-07, "loss": 0.6962, "step": 6021 }, { "epoch": 0.92, "grad_norm": 2.7593153098002845, "learning_rate": 3.191686873597599e-07, "loss": 0.7758, "step": 6022 }, { "epoch": 0.92, "grad_norm": 2.38830097389013, "learning_rate": 3.1792737239601965e-07, "loss": 0.6635, "step": 6023 }, { "epoch": 0.92, "grad_norm": 2.7072742284457427, "learning_rate": 3.166884370141876e-07, "loss": 0.7502, "step": 6024 }, { "epoch": 0.92, "grad_norm": 2.690475824319762, "learning_rate": 3.1545188151875795e-07, "loss": 0.7552, "step": 6025 }, { "epoch": 0.92, "grad_norm": 2.846891691572372, "learning_rate": 3.142177062136431e-07, "loss": 0.707, "step": 6026 }, { "epoch": 0.92, "grad_norm": 3.0359662518289827, "learning_rate": 3.1298591140216827e-07, "loss": 0.7993, "step": 6027 }, { "epoch": 0.92, "grad_norm": 2.746866925967777, "learning_rate": 3.117564973870735e-07, "loss": 0.875, "step": 6028 }, { "epoch": 0.92, "grad_norm": 2.5842931925443122, "learning_rate": 3.105294644705148e-07, "loss": 0.6753, "step": 6029 }, { "epoch": 0.92, "grad_norm": 2.7795678238783204, "learning_rate": 3.093048129540632e-07, "loss": 0.8573, "step": 6030 }, { "epoch": 0.92, "grad_norm": 2.6587604849304656, "learning_rate": 3.0808254313870133e-07, "loss": 0.7591, "step": 6031 }, { "epoch": 0.92, "grad_norm": 2.6921282826190547, "learning_rate": 3.068626553248311e-07, "loss": 0.8027, "step": 6032 }, { "epoch": 0.92, "grad_norm": 2.650607136409856, "learning_rate": 3.0564514981226703e-07, "loss": 0.7448, "step": 6033 }, { "epoch": 0.92, "grad_norm": 2.5959315484385255, "learning_rate": 3.0443002690023537e-07, "loss": 0.6463, "step": 6034 }, { "epoch": 0.92, "grad_norm": 2.4771076355337986, "learning_rate": 3.032172868873817e-07, "loss": 0.7658, "step": 6035 }, { "epoch": 0.92, "grad_norm": 2.560831138005353, "learning_rate": 3.0200693007176097e-07, "loss": 0.7482, "step": 6036 }, { "epoch": 0.92, "grad_norm": 2.6691916248453276, "learning_rate": 3.0079895675084734e-07, "loss": 0.7337, "step": 6037 }, { "epoch": 0.92, "grad_norm": 2.4902436658315352, "learning_rate": 2.995933672215257e-07, "loss": 0.7613, "step": 6038 }, { "epoch": 0.92, "grad_norm": 2.4597485662812892, "learning_rate": 2.983901617800955e-07, "loss": 0.6547, "step": 6039 }, { "epoch": 0.92, "grad_norm": 2.889902140838847, "learning_rate": 2.971893407222737e-07, "loss": 0.8037, "step": 6040 }, { "epoch": 0.92, "grad_norm": 2.8326105413501033, "learning_rate": 2.9599090434318523e-07, "loss": 0.7842, "step": 6041 }, { "epoch": 0.92, "grad_norm": 2.6873547730949623, "learning_rate": 2.947948529373723e-07, "loss": 0.791, "step": 6042 }, { "epoch": 0.92, "grad_norm": 2.812710976846079, "learning_rate": 2.9360118679879315e-07, "loss": 0.7425, "step": 6043 }, { "epoch": 0.93, "grad_norm": 2.720941580967908, "learning_rate": 2.9240990622081634e-07, "loss": 0.7981, "step": 6044 }, { "epoch": 0.93, "grad_norm": 3.1581410361795386, "learning_rate": 2.912210114962244e-07, "loss": 0.7823, "step": 6045 }, { "epoch": 0.93, "grad_norm": 2.656038443141141, "learning_rate": 2.900345029172158e-07, "loss": 0.6951, "step": 6046 }, { "epoch": 0.93, "grad_norm": 2.7266040236326963, "learning_rate": 2.888503807753984e-07, "loss": 0.7339, "step": 6047 }, { "epoch": 0.93, "grad_norm": 2.7799392585020386, "learning_rate": 2.8766864536179937e-07, "loss": 0.7675, "step": 6048 }, { "epoch": 0.93, "grad_norm": 2.737208483644531, "learning_rate": 2.8648929696685535e-07, "loss": 0.8165, "step": 6049 }, { "epoch": 0.93, "grad_norm": 2.842487845149167, "learning_rate": 2.853123358804144e-07, "loss": 0.7047, "step": 6050 }, { "epoch": 0.93, "grad_norm": 3.7143236985630113, "learning_rate": 2.8413776239174404e-07, "loss": 0.8682, "step": 6051 }, { "epoch": 0.93, "grad_norm": 2.556683398994706, "learning_rate": 2.82965576789519e-07, "loss": 0.6815, "step": 6052 }, { "epoch": 0.93, "grad_norm": 2.9004282615196924, "learning_rate": 2.81795779361832e-07, "loss": 0.6865, "step": 6053 }, { "epoch": 0.93, "grad_norm": 2.5260827904674867, "learning_rate": 2.806283703961854e-07, "loss": 0.8114, "step": 6054 }, { "epoch": 0.93, "grad_norm": 2.911421961080375, "learning_rate": 2.794633501794952e-07, "loss": 0.7076, "step": 6055 }, { "epoch": 0.93, "grad_norm": 2.5387998313691216, "learning_rate": 2.783007189980902e-07, "loss": 0.7606, "step": 6056 }, { "epoch": 0.93, "grad_norm": 2.45474959202561, "learning_rate": 2.77140477137714e-07, "loss": 0.7467, "step": 6057 }, { "epoch": 0.93, "grad_norm": 2.966599236143366, "learning_rate": 2.759826248835196e-07, "loss": 0.7403, "step": 6058 }, { "epoch": 0.93, "grad_norm": 2.4783423814613297, "learning_rate": 2.74827162520076e-07, "loss": 0.6726, "step": 6059 }, { "epoch": 0.93, "grad_norm": 2.638212372845662, "learning_rate": 2.7367409033136395e-07, "loss": 0.7919, "step": 6060 }, { "epoch": 0.93, "grad_norm": 2.565673526069858, "learning_rate": 2.725234086007744e-07, "loss": 0.8529, "step": 6061 }, { "epoch": 0.93, "grad_norm": 2.495147047946822, "learning_rate": 2.713751176111146e-07, "loss": 0.6873, "step": 6062 }, { "epoch": 0.93, "grad_norm": 2.6994013441494182, "learning_rate": 2.7022921764459977e-07, "loss": 0.6993, "step": 6063 }, { "epoch": 0.93, "grad_norm": 2.8122848045588733, "learning_rate": 2.6908570898286355e-07, "loss": 0.7469, "step": 6064 }, { "epoch": 0.93, "grad_norm": 2.593524412271433, "learning_rate": 2.679445919069457e-07, "loss": 0.7646, "step": 6065 }, { "epoch": 0.93, "grad_norm": 2.6157359363362693, "learning_rate": 2.668058666973017e-07, "loss": 0.6645, "step": 6066 }, { "epoch": 0.93, "grad_norm": 2.685971547296702, "learning_rate": 2.6566953363379777e-07, "loss": 0.7256, "step": 6067 }, { "epoch": 0.93, "grad_norm": 3.505877258203053, "learning_rate": 2.6453559299571276e-07, "loss": 0.8303, "step": 6068 }, { "epoch": 0.93, "grad_norm": 2.510661056965399, "learning_rate": 2.63404045061737e-07, "loss": 0.7109, "step": 6069 }, { "epoch": 0.93, "grad_norm": 2.650980374114757, "learning_rate": 2.622748901099759e-07, "loss": 0.6944, "step": 6070 }, { "epoch": 0.93, "grad_norm": 2.705362907402246, "learning_rate": 2.611481284179407e-07, "loss": 0.7836, "step": 6071 }, { "epoch": 0.93, "grad_norm": 2.7305925750666393, "learning_rate": 2.6002376026255883e-07, "loss": 0.7874, "step": 6072 }, { "epoch": 0.93, "grad_norm": 2.6614969532801633, "learning_rate": 2.5890178592016925e-07, "loss": 0.8704, "step": 6073 }, { "epoch": 0.93, "grad_norm": 2.4483769604343886, "learning_rate": 2.5778220566652025e-07, "loss": 0.686, "step": 6074 }, { "epoch": 0.93, "grad_norm": 2.4934663469292087, "learning_rate": 2.5666501977677614e-07, "loss": 0.721, "step": 6075 }, { "epoch": 0.93, "grad_norm": 2.459218609307165, "learning_rate": 2.5555022852550736e-07, "loss": 0.6552, "step": 6076 }, { "epoch": 0.93, "grad_norm": 2.8604168333650875, "learning_rate": 2.5443783218669804e-07, "loss": 0.6455, "step": 6077 }, { "epoch": 0.93, "grad_norm": 2.5098450736455997, "learning_rate": 2.5332783103374725e-07, "loss": 0.6351, "step": 6078 }, { "epoch": 0.93, "grad_norm": 2.841494576735548, "learning_rate": 2.52220225339459e-07, "loss": 0.7896, "step": 6079 }, { "epoch": 0.93, "grad_norm": 2.692905809508813, "learning_rate": 2.511150153760522e-07, "loss": 0.7215, "step": 6080 }, { "epoch": 0.93, "grad_norm": 2.727313980169833, "learning_rate": 2.5001220141515736e-07, "loss": 0.7061, "step": 6081 }, { "epoch": 0.93, "grad_norm": 2.5469765476370787, "learning_rate": 2.489117837278143e-07, "loss": 0.8545, "step": 6082 }, { "epoch": 0.93, "grad_norm": 2.559331431246218, "learning_rate": 2.4781376258447564e-07, "loss": 0.754, "step": 6083 }, { "epoch": 0.93, "grad_norm": 2.722703605969057, "learning_rate": 2.4671813825500324e-07, "loss": 0.6871, "step": 6084 }, { "epoch": 0.93, "grad_norm": 2.596003743401093, "learning_rate": 2.456249110086717e-07, "loss": 0.6845, "step": 6085 }, { "epoch": 0.93, "grad_norm": 3.083109926834688, "learning_rate": 2.4453408111416497e-07, "loss": 0.7946, "step": 6086 }, { "epoch": 0.93, "grad_norm": 2.7663008724269544, "learning_rate": 2.4344564883957976e-07, "loss": 0.7694, "step": 6087 }, { "epoch": 0.93, "grad_norm": 2.8746332918626205, "learning_rate": 2.4235961445241987e-07, "loss": 0.7514, "step": 6088 }, { "epoch": 0.93, "grad_norm": 2.6013942389612517, "learning_rate": 2.41275978219605e-07, "loss": 0.6923, "step": 6089 }, { "epoch": 0.93, "grad_norm": 2.912273621633104, "learning_rate": 2.4019474040746004e-07, "loss": 0.8051, "step": 6090 }, { "epoch": 0.93, "grad_norm": 2.7464518435612963, "learning_rate": 2.391159012817246e-07, "loss": 0.7467, "step": 6091 }, { "epoch": 0.93, "grad_norm": 2.7813044398351425, "learning_rate": 2.3803946110754649e-07, "loss": 0.7753, "step": 6092 }, { "epoch": 0.93, "grad_norm": 2.518752845522808, "learning_rate": 2.369654201494842e-07, "loss": 0.7387, "step": 6093 }, { "epoch": 0.93, "grad_norm": 2.725181935975217, "learning_rate": 2.3589377867150543e-07, "loss": 0.8623, "step": 6094 }, { "epoch": 0.93, "grad_norm": 3.1544796740668186, "learning_rate": 2.3482453693699282e-07, "loss": 0.7932, "step": 6095 }, { "epoch": 0.93, "grad_norm": 2.4788389266335646, "learning_rate": 2.3375769520873393e-07, "loss": 0.7703, "step": 6096 }, { "epoch": 0.93, "grad_norm": 2.842756149992874, "learning_rate": 2.3269325374892903e-07, "loss": 0.6308, "step": 6097 }, { "epoch": 0.93, "grad_norm": 2.5921408232583545, "learning_rate": 2.3163121281918888e-07, "loss": 0.7522, "step": 6098 }, { "epoch": 0.93, "grad_norm": 2.633161071717052, "learning_rate": 2.3057157268053133e-07, "loss": 0.6943, "step": 6099 }, { "epoch": 0.93, "grad_norm": 2.552057063916039, "learning_rate": 2.2951433359338805e-07, "loss": 0.7337, "step": 6100 }, { "epoch": 0.93, "grad_norm": 2.520467762762636, "learning_rate": 2.2845949581759785e-07, "loss": 0.7381, "step": 6101 }, { "epoch": 0.93, "grad_norm": 2.5453403536853623, "learning_rate": 2.2740705961241006e-07, "loss": 0.7681, "step": 6102 }, { "epoch": 0.93, "grad_norm": 3.0029521200394957, "learning_rate": 2.2635702523648552e-07, "loss": 0.6868, "step": 6103 }, { "epoch": 0.93, "grad_norm": 2.8302795535117573, "learning_rate": 2.253093929478911e-07, "loss": 0.7434, "step": 6104 }, { "epoch": 0.93, "grad_norm": 2.4786546039372634, "learning_rate": 2.2426416300410537e-07, "loss": 0.7557, "step": 6105 }, { "epoch": 0.93, "grad_norm": 2.48657198357177, "learning_rate": 2.2322133566201941e-07, "loss": 0.7145, "step": 6106 }, { "epoch": 0.93, "grad_norm": 2.6497695414229825, "learning_rate": 2.2218091117792718e-07, "loss": 0.668, "step": 6107 }, { "epoch": 0.93, "grad_norm": 2.6722487924038067, "learning_rate": 2.2114288980753962e-07, "loss": 0.7089, "step": 6108 }, { "epoch": 0.94, "grad_norm": 2.714821583484799, "learning_rate": 2.2010727180597157e-07, "loss": 0.7621, "step": 6109 }, { "epoch": 0.94, "grad_norm": 2.8942861918137464, "learning_rate": 2.1907405742774723e-07, "loss": 0.7278, "step": 6110 }, { "epoch": 0.94, "grad_norm": 3.280632946907079, "learning_rate": 2.1804324692680345e-07, "loss": 0.7916, "step": 6111 }, { "epoch": 0.94, "grad_norm": 2.5931959273453886, "learning_rate": 2.1701484055648536e-07, "loss": 0.7815, "step": 6112 }, { "epoch": 0.94, "grad_norm": 2.7709121512981065, "learning_rate": 2.1598883856954523e-07, "loss": 0.7502, "step": 6113 }, { "epoch": 0.94, "grad_norm": 2.9441111783976277, "learning_rate": 2.1496524121814576e-07, "loss": 0.8539, "step": 6114 }, { "epoch": 0.94, "grad_norm": 2.6898849255122323, "learning_rate": 2.13944048753858e-07, "loss": 0.7489, "step": 6115 }, { "epoch": 0.94, "grad_norm": 2.9925758163754987, "learning_rate": 2.1292526142766223e-07, "loss": 0.7994, "step": 6116 }, { "epoch": 0.94, "grad_norm": 2.6208054097627214, "learning_rate": 2.1190887948994822e-07, "loss": 0.6947, "step": 6117 }, { "epoch": 0.94, "grad_norm": 2.8860097331924224, "learning_rate": 2.1089490319051497e-07, "loss": 0.7517, "step": 6118 }, { "epoch": 0.94, "grad_norm": 2.49078276280568, "learning_rate": 2.0988333277856877e-07, "loss": 0.6892, "step": 6119 }, { "epoch": 0.94, "grad_norm": 2.7127390041528097, "learning_rate": 2.0887416850272514e-07, "loss": 0.8339, "step": 6120 }, { "epoch": 0.94, "grad_norm": 2.7664440908223717, "learning_rate": 2.078674106110079e-07, "loss": 0.788, "step": 6121 }, { "epoch": 0.94, "grad_norm": 2.7691557943619647, "learning_rate": 2.068630593508514e-07, "loss": 0.8135, "step": 6122 }, { "epoch": 0.94, "grad_norm": 2.519939135167319, "learning_rate": 2.05861114969097e-07, "loss": 0.7414, "step": 6123 }, { "epoch": 0.94, "grad_norm": 2.875431563743744, "learning_rate": 2.0486157771199223e-07, "loss": 0.7245, "step": 6124 }, { "epoch": 0.94, "grad_norm": 2.6094402261215643, "learning_rate": 2.0386444782519722e-07, "loss": 0.7991, "step": 6125 }, { "epoch": 0.94, "grad_norm": 2.859394466977294, "learning_rate": 2.0286972555377704e-07, "loss": 0.748, "step": 6126 }, { "epoch": 0.94, "grad_norm": 2.9414054266730982, "learning_rate": 2.0187741114220837e-07, "loss": 0.7258, "step": 6127 }, { "epoch": 0.94, "grad_norm": 2.6658460640061454, "learning_rate": 2.0088750483437393e-07, "loss": 0.7896, "step": 6128 }, { "epoch": 0.94, "grad_norm": 2.478030327430569, "learning_rate": 1.9990000687356348e-07, "loss": 0.7599, "step": 6129 }, { "epoch": 0.94, "grad_norm": 2.772471185791872, "learning_rate": 1.9891491750247738e-07, "loss": 0.8028, "step": 6130 }, { "epoch": 0.94, "grad_norm": 2.6702866352287553, "learning_rate": 1.9793223696322306e-07, "loss": 0.6865, "step": 6131 }, { "epoch": 0.94, "grad_norm": 2.640238395629171, "learning_rate": 1.969519654973151e-07, "loss": 0.6392, "step": 6132 }, { "epoch": 0.94, "grad_norm": 2.5352033004938774, "learning_rate": 1.9597410334567746e-07, "loss": 0.7221, "step": 6133 }, { "epoch": 0.94, "grad_norm": 2.7111068335551245, "learning_rate": 1.9499865074864122e-07, "loss": 0.7778, "step": 6134 }, { "epoch": 0.94, "grad_norm": 2.7186845902814403, "learning_rate": 1.9402560794594572e-07, "loss": 0.7479, "step": 6135 }, { "epoch": 0.94, "grad_norm": 2.6369963760573523, "learning_rate": 1.9305497517673633e-07, "loss": 0.7727, "step": 6136 }, { "epoch": 0.94, "grad_norm": 2.550199010640346, "learning_rate": 1.9208675267956666e-07, "loss": 0.736, "step": 6137 }, { "epoch": 0.94, "grad_norm": 2.9158190657987237, "learning_rate": 1.9112094069240084e-07, "loss": 0.7635, "step": 6138 }, { "epoch": 0.94, "grad_norm": 2.894596711016241, "learning_rate": 1.9015753945260784e-07, "loss": 0.8388, "step": 6139 }, { "epoch": 0.94, "grad_norm": 2.6370639749811517, "learning_rate": 1.8919654919696383e-07, "loss": 0.7775, "step": 6140 }, { "epoch": 0.94, "grad_norm": 2.9681643766004524, "learning_rate": 1.8823797016165435e-07, "loss": 0.7151, "step": 6141 }, { "epoch": 0.94, "grad_norm": 2.7649931358599034, "learning_rate": 1.872818025822709e-07, "loss": 0.7196, "step": 6142 }, { "epoch": 0.94, "grad_norm": 2.6534712725876854, "learning_rate": 1.863280466938111e-07, "loss": 0.7542, "step": 6143 }, { "epoch": 0.94, "grad_norm": 2.868580015988256, "learning_rate": 1.8537670273068298e-07, "loss": 0.7451, "step": 6144 }, { "epoch": 0.94, "grad_norm": 2.6025947830881266, "learning_rate": 1.844277709267006e-07, "loss": 0.7171, "step": 6145 }, { "epoch": 0.94, "grad_norm": 3.0658603765662704, "learning_rate": 1.834812515150841e-07, "loss": 0.7431, "step": 6146 }, { "epoch": 0.94, "grad_norm": 2.603293519933533, "learning_rate": 1.8253714472846184e-07, "loss": 0.7527, "step": 6147 }, { "epoch": 0.94, "grad_norm": 2.71605864580305, "learning_rate": 1.81595450798866e-07, "loss": 0.7696, "step": 6148 }, { "epoch": 0.94, "grad_norm": 2.537355350931297, "learning_rate": 1.806561699577436e-07, "loss": 0.6957, "step": 6149 }, { "epoch": 0.94, "grad_norm": 3.0873686115208168, "learning_rate": 1.7971930243593893e-07, "loss": 0.7041, "step": 6150 }, { "epoch": 0.94, "grad_norm": 2.8757638527449303, "learning_rate": 1.7878484846371001e-07, "loss": 0.7434, "step": 6151 }, { "epoch": 0.94, "grad_norm": 2.6085872075147596, "learning_rate": 1.7785280827071982e-07, "loss": 0.6898, "step": 6152 }, { "epoch": 0.94, "grad_norm": 2.5746517753489577, "learning_rate": 1.769231820860362e-07, "loss": 0.74, "step": 6153 }, { "epoch": 0.94, "grad_norm": 2.667581760238676, "learning_rate": 1.759959701381353e-07, "loss": 0.6863, "step": 6154 }, { "epoch": 0.94, "grad_norm": 2.799957147402608, "learning_rate": 1.7507117265490148e-07, "loss": 0.6544, "step": 6155 }, { "epoch": 0.94, "grad_norm": 2.6982037422852576, "learning_rate": 1.7414878986362294e-07, "loss": 0.6762, "step": 6156 }, { "epoch": 0.94, "grad_norm": 2.976137628722369, "learning_rate": 1.732288219909961e-07, "loss": 0.8104, "step": 6157 }, { "epoch": 0.94, "grad_norm": 2.559301938412268, "learning_rate": 1.7231126926312235e-07, "loss": 0.6893, "step": 6158 }, { "epoch": 0.94, "grad_norm": 2.9686201652518025, "learning_rate": 1.7139613190550906e-07, "loss": 0.7745, "step": 6159 }, { "epoch": 0.94, "grad_norm": 2.741683295872832, "learning_rate": 1.7048341014307523e-07, "loss": 0.6785, "step": 6160 }, { "epoch": 0.94, "grad_norm": 2.8906234050957478, "learning_rate": 1.6957310420013918e-07, "loss": 0.734, "step": 6161 }, { "epoch": 0.94, "grad_norm": 2.6247075942288927, "learning_rate": 1.686652143004286e-07, "loss": 0.6899, "step": 6162 }, { "epoch": 0.94, "grad_norm": 2.558633377476938, "learning_rate": 1.6775974066707833e-07, "loss": 0.7669, "step": 6163 }, { "epoch": 0.94, "grad_norm": 2.6371825253937002, "learning_rate": 1.668566835226293e-07, "loss": 0.6652, "step": 6164 }, { "epoch": 0.94, "grad_norm": 2.7199806217099205, "learning_rate": 1.6595604308902613e-07, "loss": 0.8627, "step": 6165 }, { "epoch": 0.94, "grad_norm": 2.6341507876042325, "learning_rate": 1.650578195876218e-07, "loss": 0.7276, "step": 6166 }, { "epoch": 0.94, "grad_norm": 3.1415513717540815, "learning_rate": 1.6416201323917413e-07, "loss": 0.7839, "step": 6167 }, { "epoch": 0.94, "grad_norm": 2.4806645277468076, "learning_rate": 1.6326862426384705e-07, "loss": 0.7079, "step": 6168 }, { "epoch": 0.94, "grad_norm": 2.567681957131878, "learning_rate": 1.6237765288121044e-07, "loss": 0.7302, "step": 6169 }, { "epoch": 0.94, "grad_norm": 2.749034703344554, "learning_rate": 1.6148909931024026e-07, "loss": 0.7658, "step": 6170 }, { "epoch": 0.94, "grad_norm": 2.7358323500883133, "learning_rate": 1.606029637693185e-07, "loss": 0.7315, "step": 6171 }, { "epoch": 0.94, "grad_norm": 2.783968035090616, "learning_rate": 1.5971924647623204e-07, "loss": 0.8276, "step": 6172 }, { "epoch": 0.94, "grad_norm": 2.8293213016993257, "learning_rate": 1.5883794764817272e-07, "loss": 0.7387, "step": 6173 }, { "epoch": 0.95, "grad_norm": 2.8949693147568425, "learning_rate": 1.5795906750174062e-07, "loss": 0.814, "step": 6174 }, { "epoch": 0.95, "grad_norm": 3.2324609639018966, "learning_rate": 1.570826062529407e-07, "loss": 0.7689, "step": 6175 }, { "epoch": 0.95, "grad_norm": 2.5822059542555635, "learning_rate": 1.5620856411717954e-07, "loss": 0.6804, "step": 6176 }, { "epoch": 0.95, "grad_norm": 2.777882167249254, "learning_rate": 1.5533694130927424e-07, "loss": 0.7848, "step": 6177 }, { "epoch": 0.95, "grad_norm": 3.1167534290212546, "learning_rate": 1.544677380434445e-07, "loss": 0.6931, "step": 6178 }, { "epoch": 0.95, "grad_norm": 2.8203501178896424, "learning_rate": 1.5360095453331724e-07, "loss": 0.6658, "step": 6179 }, { "epoch": 0.95, "grad_norm": 2.5251340746824225, "learning_rate": 1.5273659099192317e-07, "loss": 0.6895, "step": 6180 }, { "epoch": 0.95, "grad_norm": 2.599439628395259, "learning_rate": 1.5187464763169678e-07, "loss": 0.8254, "step": 6181 }, { "epoch": 0.95, "grad_norm": 2.804159345742806, "learning_rate": 1.5101512466448197e-07, "loss": 0.7402, "step": 6182 }, { "epoch": 0.95, "grad_norm": 2.6898781919312302, "learning_rate": 1.501580223015242e-07, "loss": 0.7552, "step": 6183 }, { "epoch": 0.95, "grad_norm": 2.7033042374066762, "learning_rate": 1.4930334075347497e-07, "loss": 0.7328, "step": 6184 }, { "epoch": 0.95, "grad_norm": 3.473450686295006, "learning_rate": 1.4845108023039178e-07, "loss": 0.8645, "step": 6185 }, { "epoch": 0.95, "grad_norm": 2.556613318569326, "learning_rate": 1.47601240941736e-07, "loss": 0.7423, "step": 6186 }, { "epoch": 0.95, "grad_norm": 2.6293302467512274, "learning_rate": 1.4675382309637386e-07, "loss": 0.6899, "step": 6187 }, { "epoch": 0.95, "grad_norm": 2.9116476239785314, "learning_rate": 1.4590882690257768e-07, "loss": 0.7177, "step": 6188 }, { "epoch": 0.95, "grad_norm": 2.6550641452752664, "learning_rate": 1.4506625256802355e-07, "loss": 0.6495, "step": 6189 }, { "epoch": 0.95, "grad_norm": 2.6021279534255637, "learning_rate": 1.4422610029979244e-07, "loss": 0.7871, "step": 6190 }, { "epoch": 0.95, "grad_norm": 2.76386657740609, "learning_rate": 1.4338837030437147e-07, "loss": 0.7694, "step": 6191 }, { "epoch": 0.95, "grad_norm": 2.7179569750741943, "learning_rate": 1.4255306278764813e-07, "loss": 0.7451, "step": 6192 }, { "epoch": 0.95, "grad_norm": 2.7081378794105344, "learning_rate": 1.4172017795492044e-07, "loss": 0.8077, "step": 6193 }, { "epoch": 0.95, "grad_norm": 3.263696185374978, "learning_rate": 1.4088971601088796e-07, "loss": 0.8088, "step": 6194 }, { "epoch": 0.95, "grad_norm": 2.7987236616266005, "learning_rate": 1.400616771596519e-07, "loss": 0.7513, "step": 6195 }, { "epoch": 0.95, "grad_norm": 2.7879734412104082, "learning_rate": 1.3923606160472504e-07, "loss": 0.7186, "step": 6196 }, { "epoch": 0.95, "grad_norm": 2.675492212746545, "learning_rate": 1.3841286954901834e-07, "loss": 0.7624, "step": 6197 }, { "epoch": 0.95, "grad_norm": 2.5732871399578583, "learning_rate": 1.3759210119485e-07, "loss": 0.7256, "step": 6198 }, { "epoch": 0.95, "grad_norm": 2.719745063329388, "learning_rate": 1.3677375674394088e-07, "loss": 0.8057, "step": 6199 }, { "epoch": 0.95, "grad_norm": 2.8279350287112672, "learning_rate": 1.359578363974179e-07, "loss": 0.7716, "step": 6200 }, { "epoch": 0.95, "grad_norm": 2.6119360982876247, "learning_rate": 1.351443403558117e-07, "loss": 0.7593, "step": 6201 }, { "epoch": 0.95, "grad_norm": 2.548879171886614, "learning_rate": 1.3433326881905683e-07, "loss": 0.6883, "step": 6202 }, { "epoch": 0.95, "grad_norm": 2.768212090009197, "learning_rate": 1.3352462198649163e-07, "loss": 0.8027, "step": 6203 }, { "epoch": 0.95, "grad_norm": 2.6002595913786246, "learning_rate": 1.327184000568582e-07, "loss": 0.7569, "step": 6204 }, { "epoch": 0.95, "grad_norm": 2.8178858099550923, "learning_rate": 1.3191460322830364e-07, "loss": 0.7394, "step": 6205 }, { "epoch": 0.95, "grad_norm": 2.758556782006995, "learning_rate": 1.3111323169837875e-07, "loss": 0.855, "step": 6206 }, { "epoch": 0.95, "grad_norm": 2.8694004245810385, "learning_rate": 1.3031428566403824e-07, "loss": 0.7254, "step": 6207 }, { "epoch": 0.95, "grad_norm": 2.875488729219647, "learning_rate": 1.2951776532164062e-07, "loss": 0.7762, "step": 6208 }, { "epoch": 0.95, "grad_norm": 2.542501426417321, "learning_rate": 1.2872367086694704e-07, "loss": 0.6791, "step": 6209 }, { "epoch": 0.95, "grad_norm": 2.637201824519699, "learning_rate": 1.2793200249512693e-07, "loss": 0.7678, "step": 6210 }, { "epoch": 0.95, "grad_norm": 2.6419157596436613, "learning_rate": 1.271427604007458e-07, "loss": 0.7553, "step": 6211 }, { "epoch": 0.95, "grad_norm": 2.639956132902984, "learning_rate": 1.263559447777818e-07, "loss": 0.7385, "step": 6212 }, { "epoch": 0.95, "grad_norm": 2.936008032052921, "learning_rate": 1.2557155581960913e-07, "loss": 0.745, "step": 6213 }, { "epoch": 0.95, "grad_norm": 2.565648666042382, "learning_rate": 1.247895937190091e-07, "loss": 0.7251, "step": 6214 }, { "epoch": 0.95, "grad_norm": 2.6059344542245975, "learning_rate": 1.2401005866816586e-07, "loss": 0.7442, "step": 6215 }, { "epoch": 0.95, "grad_norm": 2.5199423385190025, "learning_rate": 1.2323295085866827e-07, "loss": 0.7081, "step": 6216 }, { "epoch": 0.95, "grad_norm": 2.766694260160205, "learning_rate": 1.2245827048150694e-07, "loss": 0.8202, "step": 6217 }, { "epoch": 0.95, "grad_norm": 2.7075914640665246, "learning_rate": 1.2168601772707622e-07, "loss": 0.7708, "step": 6218 }, { "epoch": 0.95, "grad_norm": 2.623914655169994, "learning_rate": 1.209161927851754e-07, "loss": 0.836, "step": 6219 }, { "epoch": 0.95, "grad_norm": 2.5301146209908945, "learning_rate": 1.2014879584500427e-07, "loss": 0.6935, "step": 6220 }, { "epoch": 0.95, "grad_norm": 2.884730413869238, "learning_rate": 1.193838270951686e-07, "loss": 0.7502, "step": 6221 }, { "epoch": 0.95, "grad_norm": 2.862630433813254, "learning_rate": 1.1862128672367579e-07, "loss": 0.7959, "step": 6222 }, { "epoch": 0.95, "grad_norm": 2.7660713652174667, "learning_rate": 1.1786117491793702e-07, "loss": 0.7949, "step": 6223 }, { "epoch": 0.95, "grad_norm": 2.8777554137925074, "learning_rate": 1.1710349186476732e-07, "loss": 0.7505, "step": 6224 }, { "epoch": 0.95, "grad_norm": 2.6031163077151893, "learning_rate": 1.1634823775038218e-07, "loss": 0.7089, "step": 6225 }, { "epoch": 0.95, "grad_norm": 3.2398709920206232, "learning_rate": 1.155954127604042e-07, "loss": 0.7993, "step": 6226 }, { "epoch": 0.95, "grad_norm": 2.7285117964587773, "learning_rate": 1.148450170798543e-07, "loss": 0.7544, "step": 6227 }, { "epoch": 0.95, "grad_norm": 3.0268938632735596, "learning_rate": 1.1409705089315826e-07, "loss": 0.6847, "step": 6228 }, { "epoch": 0.95, "grad_norm": 2.6551913985471, "learning_rate": 1.1335151438414905e-07, "loss": 0.7259, "step": 6229 }, { "epoch": 0.95, "grad_norm": 2.9149685796855356, "learning_rate": 1.1260840773605452e-07, "loss": 0.7453, "step": 6230 }, { "epoch": 0.95, "grad_norm": 2.519006685781597, "learning_rate": 1.1186773113151083e-07, "loss": 0.7421, "step": 6231 }, { "epoch": 0.95, "grad_norm": 2.5509960965895147, "learning_rate": 1.1112948475255681e-07, "loss": 0.7092, "step": 6232 }, { "epoch": 0.95, "grad_norm": 2.5465021594958617, "learning_rate": 1.1039366878063062e-07, "loss": 0.7868, "step": 6233 }, { "epoch": 0.95, "grad_norm": 2.7905289925773573, "learning_rate": 1.0966028339657763e-07, "loss": 0.7965, "step": 6234 }, { "epoch": 0.95, "grad_norm": 2.847803802086891, "learning_rate": 1.0892932878064144e-07, "loss": 0.8927, "step": 6235 }, { "epoch": 0.95, "grad_norm": 2.8952086928132865, "learning_rate": 1.0820080511247055e-07, "loss": 0.888, "step": 6236 }, { "epoch": 0.95, "grad_norm": 2.726833808841528, "learning_rate": 1.0747471257111619e-07, "loss": 0.756, "step": 6237 }, { "epoch": 0.95, "grad_norm": 2.6147257812951286, "learning_rate": 1.0675105133503116e-07, "loss": 0.6977, "step": 6238 }, { "epoch": 0.95, "grad_norm": 2.5060729867642375, "learning_rate": 1.0602982158206987e-07, "loss": 0.825, "step": 6239 }, { "epoch": 0.96, "grad_norm": 2.403041630415913, "learning_rate": 1.0531102348949273e-07, "loss": 0.6932, "step": 6240 }, { "epoch": 0.96, "grad_norm": 2.5488272121277284, "learning_rate": 1.0459465723395956e-07, "loss": 0.7599, "step": 6241 }, { "epoch": 0.96, "grad_norm": 2.6571821875824044, "learning_rate": 1.0388072299153174e-07, "loss": 0.7615, "step": 6242 }, { "epoch": 0.96, "grad_norm": 2.871469208438707, "learning_rate": 1.0316922093767556e-07, "loss": 0.7261, "step": 6243 }, { "epoch": 0.96, "grad_norm": 2.6783501614843437, "learning_rate": 1.0246015124725672e-07, "loss": 0.7352, "step": 6244 }, { "epoch": 0.96, "grad_norm": 2.862113879224904, "learning_rate": 1.0175351409454693e-07, "loss": 0.7957, "step": 6245 }, { "epoch": 0.96, "grad_norm": 2.5378602149474845, "learning_rate": 1.0104930965321724e-07, "loss": 0.7019, "step": 6246 }, { "epoch": 0.96, "grad_norm": 2.9359078320937506, "learning_rate": 1.0034753809634035e-07, "loss": 0.6046, "step": 6247 }, { "epoch": 0.96, "grad_norm": 3.2423297564849984, "learning_rate": 9.96481995963916e-08, "loss": 0.738, "step": 6248 }, { "epoch": 0.96, "grad_norm": 2.491630187666835, "learning_rate": 9.89512943252513e-08, "loss": 0.71, "step": 6249 }, { "epoch": 0.96, "grad_norm": 2.6773863279396015, "learning_rate": 9.825682245419576e-08, "loss": 0.7626, "step": 6250 }, { "epoch": 0.96, "grad_norm": 2.7660942543458504, "learning_rate": 9.756478415390847e-08, "loss": 0.7542, "step": 6251 }, { "epoch": 0.96, "grad_norm": 2.897450522419471, "learning_rate": 9.687517959447446e-08, "loss": 0.7878, "step": 6252 }, { "epoch": 0.96, "grad_norm": 2.499684952206905, "learning_rate": 9.618800894537594e-08, "loss": 0.7011, "step": 6253 }, { "epoch": 0.96, "grad_norm": 2.691932857931784, "learning_rate": 9.550327237550339e-08, "loss": 0.792, "step": 6254 }, { "epoch": 0.96, "grad_norm": 2.4931560935692882, "learning_rate": 9.482097005314328e-08, "loss": 0.8742, "step": 6255 }, { "epoch": 0.96, "grad_norm": 2.820129725912161, "learning_rate": 9.414110214598815e-08, "loss": 0.6588, "step": 6256 }, { "epoch": 0.96, "grad_norm": 2.7829327595354405, "learning_rate": 9.3463668821131e-08, "loss": 0.732, "step": 6257 }, { "epoch": 0.96, "grad_norm": 2.8675835655307327, "learning_rate": 9.278867024506421e-08, "loss": 0.7629, "step": 6258 }, { "epoch": 0.96, "grad_norm": 2.736723949619082, "learning_rate": 9.211610658368397e-08, "loss": 0.7268, "step": 6259 }, { "epoch": 0.96, "grad_norm": 2.5946668669950257, "learning_rate": 9.144597800228693e-08, "loss": 0.7606, "step": 6260 }, { "epoch": 0.96, "grad_norm": 2.7121437423369064, "learning_rate": 9.077828466557359e-08, "loss": 0.8796, "step": 6261 }, { "epoch": 0.96, "grad_norm": 2.449222452836164, "learning_rate": 9.011302673764266e-08, "loss": 0.7207, "step": 6262 }, { "epoch": 0.96, "grad_norm": 2.5211066598013563, "learning_rate": 8.94502043819967e-08, "loss": 0.7476, "step": 6263 }, { "epoch": 0.96, "grad_norm": 2.7837470093262406, "learning_rate": 8.878981776153872e-08, "loss": 0.7562, "step": 6264 }, { "epoch": 0.96, "grad_norm": 2.6387684488289946, "learning_rate": 8.813186703857334e-08, "loss": 0.73, "step": 6265 }, { "epoch": 0.96, "grad_norm": 2.514520139552492, "learning_rate": 8.747635237480679e-08, "loss": 0.7326, "step": 6266 }, { "epoch": 0.96, "grad_norm": 2.642189124321444, "learning_rate": 8.68232739313446e-08, "loss": 0.7199, "step": 6267 }, { "epoch": 0.96, "grad_norm": 2.658159002696627, "learning_rate": 8.617263186869728e-08, "loss": 0.8156, "step": 6268 }, { "epoch": 0.96, "grad_norm": 2.5569705911088567, "learning_rate": 8.552442634677361e-08, "loss": 0.7859, "step": 6269 }, { "epoch": 0.96, "grad_norm": 2.507594117809746, "learning_rate": 8.487865752488277e-08, "loss": 0.6458, "step": 6270 }, { "epoch": 0.96, "grad_norm": 2.7535773207528407, "learning_rate": 8.423532556173896e-08, "loss": 0.8386, "step": 6271 }, { "epoch": 0.96, "grad_norm": 2.4238202953270656, "learning_rate": 8.359443061545348e-08, "loss": 0.6733, "step": 6272 }, { "epoch": 0.96, "grad_norm": 2.6580601292273744, "learning_rate": 8.29559728435414e-08, "loss": 0.7644, "step": 6273 }, { "epoch": 0.96, "grad_norm": 2.5288605289633383, "learning_rate": 8.231995240291613e-08, "loss": 0.7586, "step": 6274 }, { "epoch": 0.96, "grad_norm": 2.7429246054717367, "learning_rate": 8.168636944989595e-08, "loss": 0.8357, "step": 6275 }, { "epoch": 0.96, "grad_norm": 2.836135023842705, "learning_rate": 8.105522414019629e-08, "loss": 0.7551, "step": 6276 }, { "epoch": 0.96, "grad_norm": 2.601433221659957, "learning_rate": 8.04265166289353e-08, "loss": 0.7847, "step": 6277 }, { "epoch": 0.96, "grad_norm": 2.8694538043296087, "learning_rate": 7.980024707063161e-08, "loss": 0.7775, "step": 6278 }, { "epoch": 0.96, "grad_norm": 2.6236106508897428, "learning_rate": 7.91764156192043e-08, "loss": 0.7561, "step": 6279 }, { "epoch": 0.96, "grad_norm": 2.7156377840729413, "learning_rate": 7.85550224279752e-08, "loss": 0.7812, "step": 6280 }, { "epoch": 0.96, "grad_norm": 2.8693681860985936, "learning_rate": 7.793606764966321e-08, "loss": 0.7878, "step": 6281 }, { "epoch": 0.96, "grad_norm": 2.859687806470752, "learning_rate": 7.731955143639225e-08, "loss": 0.7898, "step": 6282 }, { "epoch": 0.96, "grad_norm": 2.753093997291165, "learning_rate": 7.67054739396822e-08, "loss": 0.7418, "step": 6283 }, { "epoch": 0.96, "grad_norm": 2.697051038845374, "learning_rate": 7.609383531045788e-08, "loss": 0.7606, "step": 6284 }, { "epoch": 0.96, "grad_norm": 2.6081068715682965, "learning_rate": 7.548463569904241e-08, "loss": 0.7459, "step": 6285 }, { "epoch": 0.96, "grad_norm": 5.493989378746843, "learning_rate": 7.487787525516044e-08, "loss": 0.8218, "step": 6286 }, { "epoch": 0.96, "grad_norm": 3.1768599720968322, "learning_rate": 7.427355412793602e-08, "loss": 0.7762, "step": 6287 }, { "epoch": 0.96, "grad_norm": 2.8093572987998923, "learning_rate": 7.367167246589479e-08, "loss": 0.7584, "step": 6288 }, { "epoch": 0.96, "grad_norm": 2.5319574525103574, "learning_rate": 7.307223041696177e-08, "loss": 0.7272, "step": 6289 }, { "epoch": 0.96, "grad_norm": 2.7085439960965894, "learning_rate": 7.247522812846353e-08, "loss": 0.7297, "step": 6290 }, { "epoch": 0.96, "grad_norm": 2.656018882388865, "learning_rate": 7.188066574712604e-08, "loss": 0.743, "step": 6291 }, { "epoch": 0.96, "grad_norm": 2.854981479330724, "learning_rate": 7.128854341907688e-08, "loss": 0.8181, "step": 6292 }, { "epoch": 0.96, "grad_norm": 2.7236793623907425, "learning_rate": 7.069886128984294e-08, "loss": 0.7819, "step": 6293 }, { "epoch": 0.96, "grad_norm": 2.592995981693292, "learning_rate": 7.011161950435053e-08, "loss": 0.7192, "step": 6294 }, { "epoch": 0.96, "grad_norm": 2.73348258434267, "learning_rate": 6.952681820692753e-08, "loss": 0.7506, "step": 6295 }, { "epoch": 0.96, "grad_norm": 2.794527286958416, "learning_rate": 6.89444575413023e-08, "loss": 0.7659, "step": 6296 }, { "epoch": 0.96, "grad_norm": 2.6722789816838124, "learning_rate": 6.836453765060258e-08, "loss": 0.7245, "step": 6297 }, { "epoch": 0.96, "grad_norm": 2.6796626725806423, "learning_rate": 6.778705867735657e-08, "loss": 0.7804, "step": 6298 }, { "epoch": 0.96, "grad_norm": 2.795808761513518, "learning_rate": 6.721202076349187e-08, "loss": 0.7924, "step": 6299 }, { "epoch": 0.96, "grad_norm": 2.578487552690196, "learning_rate": 6.663942405033763e-08, "loss": 0.7203, "step": 6300 }, { "epoch": 0.96, "grad_norm": 2.912508034176297, "learning_rate": 6.606926867862129e-08, "loss": 0.62, "step": 6301 }, { "epoch": 0.96, "grad_norm": 2.5523100748647063, "learning_rate": 6.550155478847075e-08, "loss": 0.7968, "step": 6302 }, { "epoch": 0.96, "grad_norm": 2.8976628589496327, "learning_rate": 6.493628251941442e-08, "loss": 0.7717, "step": 6303 }, { "epoch": 0.96, "grad_norm": 2.4732197073116837, "learning_rate": 6.437345201038115e-08, "loss": 0.7558, "step": 6304 }, { "epoch": 0.97, "grad_norm": 2.6265852771064067, "learning_rate": 6.381306339969806e-08, "loss": 0.6971, "step": 6305 }, { "epoch": 0.97, "grad_norm": 2.573298694817957, "learning_rate": 6.325511682509277e-08, "loss": 0.7839, "step": 6306 }, { "epoch": 0.97, "grad_norm": 3.0305915808088173, "learning_rate": 6.269961242369338e-08, "loss": 0.8317, "step": 6307 }, { "epoch": 0.97, "grad_norm": 2.924612836068098, "learning_rate": 6.214655033202732e-08, "loss": 0.7161, "step": 6308 }, { "epoch": 0.97, "grad_norm": 2.6344706369779183, "learning_rate": 6.159593068602255e-08, "loss": 0.6954, "step": 6309 }, { "epoch": 0.97, "grad_norm": 2.4740253678071262, "learning_rate": 6.104775362100301e-08, "loss": 0.6835, "step": 6310 }, { "epoch": 0.97, "grad_norm": 2.807355565461176, "learning_rate": 6.050201927169875e-08, "loss": 0.699, "step": 6311 }, { "epoch": 0.97, "grad_norm": 2.689978770639496, "learning_rate": 5.995872777223466e-08, "loss": 0.8269, "step": 6312 }, { "epoch": 0.97, "grad_norm": 2.728285657498286, "learning_rate": 5.941787925613507e-08, "loss": 0.748, "step": 6313 }, { "epoch": 0.97, "grad_norm": 2.6673535571875577, "learning_rate": 5.8879473856328084e-08, "loss": 0.8318, "step": 6314 }, { "epoch": 0.97, "grad_norm": 2.858044430958844, "learning_rate": 5.8343511705136746e-08, "loss": 0.755, "step": 6315 }, { "epoch": 0.97, "grad_norm": 2.6411821646605738, "learning_rate": 5.780999293428569e-08, "loss": 0.7643, "step": 6316 }, { "epoch": 0.97, "grad_norm": 2.8070989561640527, "learning_rate": 5.727891767490001e-08, "loss": 0.8535, "step": 6317 }, { "epoch": 0.97, "grad_norm": 2.5051496078033573, "learning_rate": 5.675028605750199e-08, "loss": 0.6572, "step": 6318 }, { "epoch": 0.97, "grad_norm": 2.635854501865161, "learning_rate": 5.622409821201436e-08, "loss": 0.7762, "step": 6319 }, { "epoch": 0.97, "grad_norm": 2.9390171766065847, "learning_rate": 5.5700354267760326e-08, "loss": 0.7604, "step": 6320 }, { "epoch": 0.97, "grad_norm": 2.9946880040924264, "learning_rate": 5.517905435345916e-08, "loss": 0.7079, "step": 6321 }, { "epoch": 0.97, "grad_norm": 2.561259049882208, "learning_rate": 5.4660198597235035e-08, "loss": 0.795, "step": 6322 }, { "epoch": 0.97, "grad_norm": 2.6513264646111168, "learning_rate": 5.414378712660706e-08, "loss": 0.6741, "step": 6323 }, { "epoch": 0.97, "grad_norm": 2.741628132065017, "learning_rate": 5.36298200684926e-08, "loss": 0.8026, "step": 6324 }, { "epoch": 0.97, "grad_norm": 2.7135972268576647, "learning_rate": 5.3118297549212826e-08, "loss": 0.7927, "step": 6325 }, { "epoch": 0.97, "grad_norm": 2.848539842298522, "learning_rate": 5.260921969448496e-08, "loss": 0.8676, "step": 6326 }, { "epoch": 0.97, "grad_norm": 2.789041773355471, "learning_rate": 5.210258662942669e-08, "loss": 0.815, "step": 6327 }, { "epoch": 0.97, "grad_norm": 2.5460210484795236, "learning_rate": 5.159839847855175e-08, "loss": 0.7364, "step": 6328 }, { "epoch": 0.97, "grad_norm": 2.7098943104095854, "learning_rate": 5.109665536577768e-08, "loss": 0.6325, "step": 6329 }, { "epoch": 0.97, "grad_norm": 2.855953216059735, "learning_rate": 5.059735741441807e-08, "loss": 0.6845, "step": 6330 }, { "epoch": 0.97, "grad_norm": 2.7627575881511905, "learning_rate": 5.0100504747186974e-08, "loss": 0.751, "step": 6331 }, { "epoch": 0.97, "grad_norm": 2.6643373656366363, "learning_rate": 4.9606097486195604e-08, "loss": 0.7861, "step": 6332 }, { "epoch": 0.97, "grad_norm": 2.840794267648225, "learning_rate": 4.911413575295787e-08, "loss": 0.7249, "step": 6333 }, { "epoch": 0.97, "grad_norm": 3.635273402121381, "learning_rate": 4.8624619668381504e-08, "loss": 0.6318, "step": 6334 }, { "epoch": 0.97, "grad_norm": 2.7977870462313095, "learning_rate": 4.813754935277581e-08, "loss": 0.8487, "step": 6335 }, { "epoch": 0.97, "grad_norm": 2.6463144854142757, "learning_rate": 4.765292492585172e-08, "loss": 0.6637, "step": 6336 }, { "epoch": 0.97, "grad_norm": 3.065956639630013, "learning_rate": 4.717074650671394e-08, "loss": 0.8322, "step": 6337 }, { "epoch": 0.97, "grad_norm": 2.7850962673987762, "learning_rate": 4.6691014213868794e-08, "loss": 0.7225, "step": 6338 }, { "epoch": 0.97, "grad_norm": 2.9330373089277932, "learning_rate": 4.621372816522196e-08, "loss": 0.8766, "step": 6339 }, { "epoch": 0.97, "grad_norm": 2.653237142876649, "learning_rate": 4.573888847807517e-08, "loss": 0.7426, "step": 6340 }, { "epoch": 0.97, "grad_norm": 2.4358443741832705, "learning_rate": 4.5266495269132846e-08, "loss": 0.7164, "step": 6341 }, { "epoch": 0.97, "grad_norm": 2.392735653487316, "learning_rate": 4.479654865449545e-08, "loss": 0.6767, "step": 6342 }, { "epoch": 0.97, "grad_norm": 2.972324375045306, "learning_rate": 4.432904874966171e-08, "loss": 0.8116, "step": 6343 }, { "epoch": 0.97, "grad_norm": 2.380025455047499, "learning_rate": 4.386399566953081e-08, "loss": 0.6901, "step": 6344 }, { "epoch": 0.97, "grad_norm": 2.771678928999211, "learning_rate": 4.340138952839912e-08, "loss": 0.7367, "step": 6345 }, { "epoch": 0.97, "grad_norm": 2.557862003826193, "learning_rate": 4.294123043996235e-08, "loss": 0.777, "step": 6346 }, { "epoch": 0.97, "grad_norm": 2.840869855641693, "learning_rate": 4.248351851731558e-08, "loss": 0.7093, "step": 6347 }, { "epoch": 0.97, "grad_norm": 2.7641169505849392, "learning_rate": 4.2028253872949954e-08, "loss": 0.813, "step": 6348 }, { "epoch": 0.97, "grad_norm": 2.7348826985320174, "learning_rate": 4.157543661875929e-08, "loss": 0.7661, "step": 6349 }, { "epoch": 0.97, "grad_norm": 2.7141646574375673, "learning_rate": 4.112506686603013e-08, "loss": 0.7166, "step": 6350 }, { "epoch": 0.97, "grad_norm": 2.433956966142259, "learning_rate": 4.067714472545281e-08, "loss": 0.6179, "step": 6351 }, { "epoch": 0.97, "grad_norm": 2.5873762825931, "learning_rate": 4.023167030711484e-08, "loss": 0.7205, "step": 6352 }, { "epoch": 0.97, "grad_norm": 2.5709124602943914, "learning_rate": 3.978864372049973e-08, "loss": 0.7412, "step": 6353 }, { "epoch": 0.97, "grad_norm": 2.894106131249421, "learning_rate": 3.934806507449041e-08, "loss": 0.7344, "step": 6354 }, { "epoch": 0.97, "grad_norm": 2.649700374213911, "learning_rate": 3.8909934477370234e-08, "loss": 0.7375, "step": 6355 }, { "epoch": 0.97, "grad_norm": 2.7352121319525016, "learning_rate": 3.847425203681865e-08, "loss": 0.6973, "step": 6356 }, { "epoch": 0.97, "grad_norm": 2.546676278554904, "learning_rate": 3.804101785991443e-08, "loss": 0.8082, "step": 6357 }, { "epoch": 0.97, "grad_norm": 2.731368752423272, "learning_rate": 3.7610232053135745e-08, "loss": 0.7807, "step": 6358 }, { "epoch": 0.97, "grad_norm": 2.600089510254957, "learning_rate": 3.7181894722355674e-08, "loss": 0.7551, "step": 6359 }, { "epoch": 0.97, "grad_norm": 2.710146340660481, "learning_rate": 3.675600597284779e-08, "loss": 0.7711, "step": 6360 }, { "epoch": 0.97, "grad_norm": 2.728594166988212, "learning_rate": 3.6332565909283915e-08, "loss": 0.7805, "step": 6361 }, { "epoch": 0.97, "grad_norm": 3.1455008400453077, "learning_rate": 3.591157463573303e-08, "loss": 0.7706, "step": 6362 }, { "epoch": 0.97, "grad_norm": 2.7493337696123508, "learning_rate": 3.5493032255665694e-08, "loss": 0.7219, "step": 6363 }, { "epoch": 0.97, "grad_norm": 2.582480537155428, "learning_rate": 3.507693887194408e-08, "loss": 0.7119, "step": 6364 }, { "epoch": 0.97, "grad_norm": 2.9089662705341968, "learning_rate": 3.4663294586835264e-08, "loss": 0.7601, "step": 6365 }, { "epoch": 0.97, "grad_norm": 2.579269195629373, "learning_rate": 3.425209950200015e-08, "loss": 0.719, "step": 6366 }, { "epoch": 0.97, "grad_norm": 2.9961981717863577, "learning_rate": 3.3843353718499004e-08, "loss": 0.7513, "step": 6367 }, { "epoch": 0.97, "grad_norm": 2.7274870617059004, "learning_rate": 3.343705733679037e-08, "loss": 0.7911, "step": 6368 }, { "epoch": 0.97, "grad_norm": 2.6028781172920112, "learning_rate": 3.3033210456729915e-08, "loss": 0.8072, "step": 6369 }, { "epoch": 0.98, "grad_norm": 2.5206585417321614, "learning_rate": 3.263181317757269e-08, "loss": 0.7499, "step": 6370 }, { "epoch": 0.98, "grad_norm": 2.89486420226331, "learning_rate": 3.2232865597969786e-08, "loss": 0.763, "step": 6371 }, { "epoch": 0.98, "grad_norm": 2.4263650727508166, "learning_rate": 3.183636781597277e-08, "loss": 0.7045, "step": 6372 }, { "epoch": 0.98, "grad_norm": 2.688965635402593, "learning_rate": 3.144231992902813e-08, "loss": 0.8026, "step": 6373 }, { "epoch": 0.98, "grad_norm": 2.9726884382447123, "learning_rate": 3.105072203398285e-08, "loss": 0.8126, "step": 6374 }, { "epoch": 0.98, "grad_norm": 2.5089298805790174, "learning_rate": 3.0661574227081046e-08, "loss": 0.82, "step": 6375 }, { "epoch": 0.98, "grad_norm": 2.6440762681304477, "learning_rate": 3.0274876603962885e-08, "loss": 0.7883, "step": 6376 }, { "epoch": 0.98, "grad_norm": 2.514536365196918, "learning_rate": 2.989062925966901e-08, "loss": 0.7623, "step": 6377 }, { "epoch": 0.98, "grad_norm": 2.4245283656582233, "learning_rate": 2.95088322886361e-08, "loss": 0.7147, "step": 6378 }, { "epoch": 0.98, "grad_norm": 2.896910981296976, "learning_rate": 2.9129485784699096e-08, "loss": 0.8048, "step": 6379 }, { "epoch": 0.98, "grad_norm": 2.694298363324352, "learning_rate": 2.8752589841092306e-08, "loss": 0.7603, "step": 6380 }, { "epoch": 0.98, "grad_norm": 2.7538631724452465, "learning_rate": 2.8378144550443852e-08, "loss": 0.8414, "step": 6381 }, { "epoch": 0.98, "grad_norm": 2.607625116690004, "learning_rate": 2.8006150004782352e-08, "loss": 0.7615, "step": 6382 }, { "epoch": 0.98, "grad_norm": 2.7452716377783775, "learning_rate": 2.7636606295534664e-08, "loss": 0.7256, "step": 6383 }, { "epoch": 0.98, "grad_norm": 2.6681791298014645, "learning_rate": 2.7269513513523692e-08, "loss": 0.7814, "step": 6384 }, { "epoch": 0.98, "grad_norm": 2.785266623894804, "learning_rate": 2.6904871748970607e-08, "loss": 0.6774, "step": 6385 }, { "epoch": 0.98, "grad_norm": 2.548359718667786, "learning_rate": 2.6542681091493715e-08, "loss": 0.6805, "step": 6386 }, { "epoch": 0.98, "grad_norm": 2.8727987933271812, "learning_rate": 2.6182941630109594e-08, "loss": 0.7931, "step": 6387 }, { "epoch": 0.98, "grad_norm": 2.954983354484794, "learning_rate": 2.5825653453233067e-08, "loss": 0.84, "step": 6388 }, { "epoch": 0.98, "grad_norm": 2.4906140254521496, "learning_rate": 2.547081664867501e-08, "loss": 0.6294, "step": 6389 }, { "epoch": 0.98, "grad_norm": 2.8461489930565773, "learning_rate": 2.511843130364233e-08, "loss": 0.8024, "step": 6390 }, { "epoch": 0.98, "grad_norm": 2.539588939846344, "learning_rate": 2.4768497504744637e-08, "loss": 0.7963, "step": 6391 }, { "epoch": 0.98, "grad_norm": 2.551724315283847, "learning_rate": 2.4421015337984244e-08, "loss": 0.7367, "step": 6392 }, { "epoch": 0.98, "grad_norm": 2.792982810540755, "learning_rate": 2.4075984888762838e-08, "loss": 0.7481, "step": 6393 }, { "epoch": 0.98, "grad_norm": 2.6340251459926645, "learning_rate": 2.3733406241878145e-08, "loss": 0.7786, "step": 6394 }, { "epoch": 0.98, "grad_norm": 2.795483266771458, "learning_rate": 2.3393279481527255e-08, "loss": 0.7786, "step": 6395 }, { "epoch": 0.98, "grad_norm": 2.7073715073593534, "learning_rate": 2.305560469130552e-08, "loss": 0.7669, "step": 6396 }, { "epoch": 0.98, "grad_norm": 2.5520780876011058, "learning_rate": 2.2720381954201008e-08, "loss": 0.6954, "step": 6397 }, { "epoch": 0.98, "grad_norm": 2.6048219054047697, "learning_rate": 2.2387611352603365e-08, "loss": 0.7811, "step": 6398 }, { "epoch": 0.98, "grad_norm": 2.6189944256832876, "learning_rate": 2.2057292968298284e-08, "loss": 0.7038, "step": 6399 }, { "epoch": 0.98, "grad_norm": 2.615322166675953, "learning_rate": 2.1729426882468596e-08, "loss": 0.7856, "step": 6400 }, { "epoch": 0.98, "grad_norm": 2.950085694256778, "learning_rate": 2.1404013175694292e-08, "loss": 0.7141, "step": 6401 }, { "epoch": 0.98, "grad_norm": 2.7197599076092187, "learning_rate": 2.1081051927953623e-08, "loss": 0.726, "step": 6402 }, { "epoch": 0.98, "grad_norm": 2.9021177208654376, "learning_rate": 2.0760543218621976e-08, "loss": 0.7622, "step": 6403 }, { "epoch": 0.98, "grad_norm": 2.5346673680250844, "learning_rate": 2.044248712646968e-08, "loss": 0.6864, "step": 6404 }, { "epoch": 0.98, "grad_norm": 2.7369348943349383, "learning_rate": 2.0126883729667534e-08, "loss": 0.7364, "step": 6405 }, { "epoch": 0.98, "grad_norm": 2.534941329247269, "learning_rate": 1.9813733105780163e-08, "loss": 0.741, "step": 6406 }, { "epoch": 0.98, "grad_norm": 2.9269037015104162, "learning_rate": 1.950303533177378e-08, "loss": 0.7941, "step": 6407 }, { "epoch": 0.98, "grad_norm": 2.6677188601438586, "learning_rate": 1.91947904840073e-08, "loss": 0.7969, "step": 6408 }, { "epoch": 0.98, "grad_norm": 2.5997357128251033, "learning_rate": 1.88889986382379e-08, "loss": 0.7151, "step": 6409 }, { "epoch": 0.98, "grad_norm": 2.7504245622344503, "learning_rate": 1.858565986962324e-08, "loss": 0.7679, "step": 6410 }, { "epoch": 0.98, "grad_norm": 2.626561601579103, "learning_rate": 1.8284774252713688e-08, "loss": 0.7941, "step": 6411 }, { "epoch": 0.98, "grad_norm": 2.771230928418999, "learning_rate": 1.7986341861458976e-08, "loss": 0.7155, "step": 6412 }, { "epoch": 0.98, "grad_norm": 2.696959350794206, "learning_rate": 1.7690362769205993e-08, "loss": 0.747, "step": 6413 }, { "epoch": 0.98, "grad_norm": 3.13649995719672, "learning_rate": 1.7396837048696547e-08, "loss": 0.8031, "step": 6414 }, { "epoch": 0.98, "grad_norm": 2.6578860908672857, "learning_rate": 1.710576477207293e-08, "loss": 0.803, "step": 6415 }, { "epoch": 0.98, "grad_norm": 2.844554639142442, "learning_rate": 1.6817146010871255e-08, "loss": 0.7004, "step": 6416 }, { "epoch": 0.98, "grad_norm": 2.5950797820771916, "learning_rate": 1.653098083602478e-08, "loss": 0.7316, "step": 6417 }, { "epoch": 0.98, "grad_norm": 2.939752922140131, "learning_rate": 1.6247269317868353e-08, "loss": 0.7377, "step": 6418 }, { "epoch": 0.98, "grad_norm": 2.654338826560342, "learning_rate": 1.596601152612731e-08, "loss": 0.8092, "step": 6419 }, { "epoch": 0.98, "grad_norm": 2.55037502648767, "learning_rate": 1.5687207529927472e-08, "loss": 0.6677, "step": 6420 }, { "epoch": 0.98, "grad_norm": 2.4745890974533937, "learning_rate": 1.54108573977918e-08, "loss": 0.6845, "step": 6421 }, { "epoch": 0.98, "grad_norm": 3.352244373978386, "learning_rate": 1.51369611976393e-08, "loss": 0.7089, "step": 6422 }, { "epoch": 0.98, "grad_norm": 2.67104638009709, "learning_rate": 1.4865518996786122e-08, "loss": 0.8284, "step": 6423 }, { "epoch": 0.98, "grad_norm": 2.9781450975243584, "learning_rate": 1.4596530861944458e-08, "loss": 0.7792, "step": 6424 }, { "epoch": 0.98, "grad_norm": 2.5664080284926336, "learning_rate": 1.432999685922365e-08, "loss": 0.8157, "step": 6425 }, { "epoch": 0.98, "grad_norm": 2.7769335025657664, "learning_rate": 1.4065917054132405e-08, "loss": 0.8154, "step": 6426 }, { "epoch": 0.98, "grad_norm": 2.649007252116273, "learning_rate": 1.3804291511572144e-08, "loss": 0.8174, "step": 6427 }, { "epoch": 0.98, "grad_norm": 2.926878045065561, "learning_rate": 1.3545120295843651e-08, "loss": 0.7972, "step": 6428 }, { "epoch": 0.98, "grad_norm": 2.8927224247284036, "learning_rate": 1.3288403470643751e-08, "loss": 0.7388, "step": 6429 }, { "epoch": 0.98, "grad_norm": 2.7844682550817415, "learning_rate": 1.3034141099066422e-08, "loss": 0.7739, "step": 6430 }, { "epoch": 0.98, "grad_norm": 2.6199308848828426, "learning_rate": 1.2782333243601675e-08, "loss": 0.8188, "step": 6431 }, { "epoch": 0.98, "grad_norm": 2.5090276146240362, "learning_rate": 1.2532979966138892e-08, "loss": 0.7832, "step": 6432 }, { "epoch": 0.98, "grad_norm": 2.7804406982385927, "learning_rate": 1.2286081327959055e-08, "loss": 0.6882, "step": 6433 }, { "epoch": 0.98, "grad_norm": 3.2823754032742363, "learning_rate": 1.2041637389745842e-08, "loss": 0.8219, "step": 6434 }, { "epoch": 0.98, "grad_norm": 2.592159073695726, "learning_rate": 1.1799648211574533e-08, "loss": 0.8091, "step": 6435 }, { "epoch": 0.99, "grad_norm": 2.667384164685074, "learning_rate": 1.1560113852919774e-08, "loss": 0.7555, "step": 6436 }, { "epoch": 0.99, "grad_norm": 2.6250053411416956, "learning_rate": 1.1323034372653364e-08, "loss": 0.7027, "step": 6437 }, { "epoch": 0.99, "grad_norm": 2.558165207717662, "learning_rate": 1.1088409829042023e-08, "loss": 0.6945, "step": 6438 }, { "epoch": 0.99, "grad_norm": 2.554335993292053, "learning_rate": 1.0856240279750741e-08, "loss": 0.7743, "step": 6439 }, { "epoch": 0.99, "grad_norm": 2.6211979677469794, "learning_rate": 1.0626525781838316e-08, "loss": 0.6903, "step": 6440 }, { "epoch": 0.99, "grad_norm": 2.5971448988828993, "learning_rate": 1.0399266391764029e-08, "loss": 0.7464, "step": 6441 }, { "epoch": 0.99, "grad_norm": 2.5474481779950495, "learning_rate": 1.0174462165380983e-08, "loss": 0.8297, "step": 6442 }, { "epoch": 0.99, "grad_norm": 2.4791215754553084, "learning_rate": 9.952113157940534e-09, "loss": 0.7697, "step": 6443 }, { "epoch": 0.99, "grad_norm": 2.479645255730251, "learning_rate": 9.732219424087863e-09, "loss": 0.7865, "step": 6444 }, { "epoch": 0.99, "grad_norm": 2.620899042191569, "learning_rate": 9.514781017869734e-09, "loss": 0.6784, "step": 6445 }, { "epoch": 0.99, "grad_norm": 2.7710163417045157, "learning_rate": 9.299797992724514e-09, "loss": 0.7089, "step": 6446 }, { "epoch": 0.99, "grad_norm": 2.6119013634252335, "learning_rate": 9.087270401488823e-09, "loss": 0.7556, "step": 6447 }, { "epoch": 0.99, "grad_norm": 2.5940185225330477, "learning_rate": 8.877198296396438e-09, "loss": 0.6927, "step": 6448 }, { "epoch": 0.99, "grad_norm": 2.5758034981359446, "learning_rate": 8.66958172907717e-09, "loss": 0.705, "step": 6449 }, { "epoch": 0.99, "grad_norm": 3.4943602171675057, "learning_rate": 8.464420750556868e-09, "loss": 0.8198, "step": 6450 }, { "epoch": 0.99, "grad_norm": 2.5946902014802995, "learning_rate": 8.26171541125964e-09, "loss": 0.7201, "step": 6451 }, { "epoch": 0.99, "grad_norm": 2.5390879531895627, "learning_rate": 8.061465761003417e-09, "loss": 0.8203, "step": 6452 }, { "epoch": 0.99, "grad_norm": 2.982939394034132, "learning_rate": 7.863671849004384e-09, "loss": 0.8149, "step": 6453 }, { "epoch": 0.99, "grad_norm": 2.6976834349346293, "learning_rate": 7.668333723874765e-09, "loss": 0.8409, "step": 6454 }, { "epoch": 0.99, "grad_norm": 2.7794087904559786, "learning_rate": 7.475451433623936e-09, "loss": 0.7434, "step": 6455 }, { "epoch": 0.99, "grad_norm": 2.7798006349430144, "learning_rate": 7.2850250256562e-09, "loss": 0.8585, "step": 6456 }, { "epoch": 0.99, "grad_norm": 2.749781684197471, "learning_rate": 7.097054546773008e-09, "loss": 0.7369, "step": 6457 }, { "epoch": 0.99, "grad_norm": 2.754818961661538, "learning_rate": 6.911540043171849e-09, "loss": 0.7442, "step": 6458 }, { "epoch": 0.99, "grad_norm": 2.4299632918254526, "learning_rate": 6.728481560448474e-09, "loss": 0.5999, "step": 6459 }, { "epoch": 0.99, "grad_norm": 2.691427186417734, "learning_rate": 6.54787914359134e-09, "loss": 0.7845, "step": 6460 }, { "epoch": 0.99, "grad_norm": 2.47678081900891, "learning_rate": 6.369732836989384e-09, "loss": 0.7412, "step": 6461 }, { "epoch": 0.99, "grad_norm": 2.7436779125657464, "learning_rate": 6.194042684425361e-09, "loss": 0.7747, "step": 6462 }, { "epoch": 0.99, "grad_norm": 2.8750941731143174, "learning_rate": 6.0208087290780645e-09, "loss": 0.7888, "step": 6463 }, { "epoch": 0.99, "grad_norm": 2.770040007515961, "learning_rate": 5.850031013524549e-09, "loss": 0.6909, "step": 6464 }, { "epoch": 0.99, "grad_norm": 2.6684774674425613, "learning_rate": 5.681709579737904e-09, "loss": 0.7019, "step": 6465 }, { "epoch": 0.99, "grad_norm": 3.0274878532422287, "learning_rate": 5.515844469085041e-09, "loss": 0.7444, "step": 6466 }, { "epoch": 0.99, "grad_norm": 2.5402296932243495, "learning_rate": 5.352435722332238e-09, "loss": 0.6953, "step": 6467 }, { "epoch": 0.99, "grad_norm": 2.6199879604104117, "learning_rate": 5.191483379639595e-09, "loss": 0.7152, "step": 6468 }, { "epoch": 0.99, "grad_norm": 2.5531274187025073, "learning_rate": 5.0329874805654656e-09, "loss": 0.7096, "step": 6469 }, { "epoch": 0.99, "grad_norm": 2.634831711496866, "learning_rate": 4.876948064064246e-09, "loss": 0.7461, "step": 6470 }, { "epoch": 0.99, "grad_norm": 2.3984961627115484, "learning_rate": 4.723365168485261e-09, "loss": 0.7232, "step": 6471 }, { "epoch": 0.99, "grad_norm": 2.4114883364205943, "learning_rate": 4.572238831574982e-09, "loss": 0.717, "step": 6472 }, { "epoch": 0.99, "grad_norm": 2.5541673170994525, "learning_rate": 4.4235690904759206e-09, "loss": 0.8181, "step": 6473 }, { "epoch": 0.99, "grad_norm": 2.7352721716539095, "learning_rate": 4.277355981727738e-09, "loss": 0.6996, "step": 6474 }, { "epoch": 0.99, "grad_norm": 2.7112077492868942, "learning_rate": 4.133599541265021e-09, "loss": 0.7444, "step": 6475 }, { "epoch": 0.99, "grad_norm": 2.8709744528710486, "learning_rate": 3.992299804418398e-09, "loss": 0.7478, "step": 6476 }, { "epoch": 0.99, "grad_norm": 2.830825498249224, "learning_rate": 3.853456805915645e-09, "loss": 0.8736, "step": 6477 }, { "epoch": 0.99, "grad_norm": 2.7189102796459337, "learning_rate": 3.7170705798816875e-09, "loss": 0.717, "step": 6478 }, { "epoch": 0.99, "grad_norm": 3.2342398789984927, "learning_rate": 3.583141159834158e-09, "loss": 0.801, "step": 6479 }, { "epoch": 0.99, "grad_norm": 2.80624204172367, "learning_rate": 3.4516685786922798e-09, "loss": 0.8927, "step": 6480 }, { "epoch": 0.99, "grad_norm": 2.8881355207422845, "learning_rate": 3.322652868764653e-09, "loss": 0.7925, "step": 6481 }, { "epoch": 0.99, "grad_norm": 2.833888014886496, "learning_rate": 3.196094061762578e-09, "loss": 0.8012, "step": 6482 }, { "epoch": 0.99, "grad_norm": 2.6885761878683825, "learning_rate": 3.071992188790063e-09, "loss": 0.8459, "step": 6483 }, { "epoch": 0.99, "grad_norm": 2.6920429486726296, "learning_rate": 2.9503472803471546e-09, "loss": 0.7366, "step": 6484 }, { "epoch": 0.99, "grad_norm": 2.793627620980447, "learning_rate": 2.831159366331049e-09, "loss": 0.7325, "step": 6485 }, { "epoch": 0.99, "grad_norm": 2.4166025338417283, "learning_rate": 2.7144284760349804e-09, "loss": 0.6311, "step": 6486 }, { "epoch": 0.99, "grad_norm": 2.788670358621249, "learning_rate": 2.600154638148222e-09, "loss": 0.7814, "step": 6487 }, { "epoch": 0.99, "grad_norm": 2.7429804605417925, "learning_rate": 2.488337880754976e-09, "loss": 0.8176, "step": 6488 }, { "epoch": 0.99, "grad_norm": 2.699912986291421, "learning_rate": 2.378978231338813e-09, "loss": 0.7569, "step": 6489 }, { "epoch": 0.99, "grad_norm": 2.6965672451161726, "learning_rate": 2.272075716774902e-09, "loss": 0.711, "step": 6490 }, { "epoch": 0.99, "grad_norm": 2.637709653615361, "learning_rate": 2.167630363338891e-09, "loss": 0.786, "step": 6491 }, { "epoch": 0.99, "grad_norm": 2.7864743894646486, "learning_rate": 2.065642196699136e-09, "loss": 0.7065, "step": 6492 }, { "epoch": 0.99, "grad_norm": 2.5309378266446063, "learning_rate": 1.966111241922253e-09, "loss": 0.7645, "step": 6493 }, { "epoch": 0.99, "grad_norm": 2.9183202809954496, "learning_rate": 1.8690375234697854e-09, "loss": 0.7266, "step": 6494 }, { "epoch": 0.99, "grad_norm": 2.8177372518135932, "learning_rate": 1.7744210651993167e-09, "loss": 0.7258, "step": 6495 }, { "epoch": 0.99, "grad_norm": 2.797322429322555, "learning_rate": 1.6822618903655773e-09, "loss": 0.7366, "step": 6496 }, { "epoch": 0.99, "grad_norm": 2.6958226240614174, "learning_rate": 1.592560021618228e-09, "loss": 0.7854, "step": 6497 }, { "epoch": 0.99, "grad_norm": 2.5733350687801146, "learning_rate": 1.5053154810040772e-09, "loss": 0.6753, "step": 6498 }, { "epoch": 0.99, "grad_norm": 2.8514235606033753, "learning_rate": 1.4205282899659722e-09, "loss": 0.7067, "step": 6499 }, { "epoch": 0.99, "grad_norm": 3.0375905676960664, "learning_rate": 1.3381984693405793e-09, "loss": 0.7599, "step": 6500 }, { "epoch": 1.0, "grad_norm": 3.483747166388334, "learning_rate": 1.2583260393628226e-09, "loss": 0.7857, "step": 6501 }, { "epoch": 1.0, "grad_norm": 2.8775232736931855, "learning_rate": 1.1809110196636663e-09, "loss": 0.8218, "step": 6502 }, { "epoch": 1.0, "grad_norm": 3.0171976204757547, "learning_rate": 1.1059534292690022e-09, "loss": 0.7009, "step": 6503 }, { "epoch": 1.0, "grad_norm": 2.5790803766513988, "learning_rate": 1.0334532866007608e-09, "loss": 0.7804, "step": 6504 }, { "epoch": 1.0, "grad_norm": 2.8344092373568186, "learning_rate": 9.634106094791317e-10, "loss": 0.7493, "step": 6505 }, { "epoch": 1.0, "grad_norm": 2.5103505676785516, "learning_rate": 8.958254151170131e-10, "loss": 0.7996, "step": 6506 }, { "epoch": 1.0, "grad_norm": 2.6642999642212963, "learning_rate": 8.30697720126672e-10, "loss": 0.6592, "step": 6507 }, { "epoch": 1.0, "grad_norm": 2.861760072908654, "learning_rate": 7.680275405130833e-10, "loss": 0.749, "step": 6508 }, { "epoch": 1.0, "grad_norm": 2.5797148250536144, "learning_rate": 7.078148916783711e-10, "loss": 0.7418, "step": 6509 }, { "epoch": 1.0, "grad_norm": 2.572541953632293, "learning_rate": 6.500597884229188e-10, "loss": 0.7554, "step": 6510 }, { "epoch": 1.0, "grad_norm": 2.605496462403277, "learning_rate": 5.947622449409274e-10, "loss": 0.8051, "step": 6511 }, { "epoch": 1.0, "grad_norm": 2.316767161854669, "learning_rate": 5.419222748226372e-10, "loss": 0.7009, "step": 6512 }, { "epoch": 1.0, "grad_norm": 2.547132182034208, "learning_rate": 4.915398910532166e-10, "loss": 0.8116, "step": 6513 }, { "epoch": 1.0, "grad_norm": 2.6269179574986836, "learning_rate": 4.436151060183136e-10, "loss": 0.7224, "step": 6514 }, { "epoch": 1.0, "grad_norm": 2.8338788225773315, "learning_rate": 3.9814793149295373e-10, "loss": 0.7944, "step": 6515 }, { "epoch": 1.0, "grad_norm": 2.8067035466603394, "learning_rate": 3.5513837865486236e-10, "loss": 0.8032, "step": 6516 }, { "epoch": 1.0, "grad_norm": 2.550073722813949, "learning_rate": 3.145864580722524e-10, "loss": 0.7207, "step": 6517 }, { "epoch": 1.0, "grad_norm": 2.636265469611505, "learning_rate": 2.764921797138165e-10, "loss": 0.7546, "step": 6518 }, { "epoch": 1.0, "grad_norm": 2.616310209319721, "learning_rate": 2.4085555293984484e-10, "loss": 0.8064, "step": 6519 }, { "epoch": 1.0, "grad_norm": 2.817876730596737, "learning_rate": 2.0767658650999723e-10, "loss": 0.7837, "step": 6520 }, { "epoch": 1.0, "grad_norm": 2.4123576010128955, "learning_rate": 1.7695528857886169e-10, "loss": 0.7163, "step": 6521 }, { "epoch": 1.0, "grad_norm": 2.801149751350197, "learning_rate": 1.4869166669595482e-10, "loss": 0.791, "step": 6522 }, { "epoch": 1.0, "grad_norm": 2.669239979861841, "learning_rate": 1.2288572780905227e-10, "loss": 0.6997, "step": 6523 }, { "epoch": 1.0, "grad_norm": 3.0859647495600746, "learning_rate": 9.95374782586378e-11, "loss": 0.7652, "step": 6524 }, { "epoch": 1.0, "grad_norm": 2.8567736532925334, "learning_rate": 7.86469237845644e-11, "loss": 0.8433, "step": 6525 }, { "epoch": 1.0, "grad_norm": 2.95683604369921, "learning_rate": 6.021406952161357e-11, "loss": 0.8292, "step": 6526 }, { "epoch": 1.0, "grad_norm": 2.5110923123790236, "learning_rate": 4.423891999838503e-11, "loss": 0.7575, "step": 6527 }, { "epoch": 1.0, "grad_norm": 2.3604571293366843, "learning_rate": 3.072147914284784e-11, "loss": 0.6375, "step": 6528 }, { "epoch": 1.0, "grad_norm": 2.6002150822854406, "learning_rate": 1.966175027567907e-11, "loss": 0.6676, "step": 6529 }, { "epoch": 1.0, "grad_norm": 2.781193187571927, "learning_rate": 1.1059736115814901e-11, "loss": 0.7323, "step": 6530 }, { "epoch": 1.0, "grad_norm": 2.670757867546929, "learning_rate": 4.915438777119974e-12, "loss": 0.7783, "step": 6531 }, { "epoch": 1.0, "grad_norm": 2.786026911255166, "learning_rate": 1.2288597694976034e-12, "loss": 0.6062, "step": 6532 }, { "epoch": 1.0, "grad_norm": 4.022792001865195, "learning_rate": 0.0, "loss": 0.8392, "step": 6533 }, { "epoch": 1.0, "step": 6533, "total_flos": 6195132391227392.0, "train_loss": 0.8881614911151154, "train_runtime": 203520.9615, "train_samples_per_second": 4.109, "train_steps_per_second": 0.032 } ], "logging_steps": 1.0, "max_steps": 6533, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "total_flos": 6195132391227392.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }